Initial commit: breakpilot-lehrer - Lehrer KI Platform

Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website,
Klausur-Service, School-Service, Voice-Service, Geo-Service,
BreakPilot Drive, Agent-Core

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Benjamin Boenisch
2026-02-11 23:47:26 +01:00
commit 5a31f52310
1224 changed files with 425430 additions and 0 deletions

View File

@@ -0,0 +1,14 @@
"""
BreakPilot Alerts Agent.
Automatisierte Relevanz-Filterung für Google Alerts.
Reduziert ~900 Alerts/Monat auf <120 Review/Keep.
Komponenten:
- ingestion: RSS Fetcher, Email Fetcher
- processing: Deduplication, Clustering, Relevance Scoring
- models: AlertItem, RelevanceProfile
- api: Inbox, Feedback Endpoints
"""
__version__ = "0.1.0"

View File

@@ -0,0 +1,20 @@
"""
Actions Module für Alerts Agent.
Führt Aktionen aus, die durch Regeln oder Scoring ausgelöst werden.
"""
from .base import ActionHandler, ActionResult, ActionType
from .email_action import EmailAction
from .webhook_action import WebhookAction
from .slack_action import SlackAction
from .dispatcher import ActionDispatcher
__all__ = [
"ActionHandler",
"ActionResult",
"ActionType",
"EmailAction",
"WebhookAction",
"SlackAction",
"ActionDispatcher",
]

View File

@@ -0,0 +1,123 @@
"""
Base Classes für Alert Actions.
Definiert das Interface für alle Action-Handler.
"""
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from datetime import datetime
from typing import Dict, Any, Optional, List
from enum import Enum
class ActionType(str, Enum):
"""Verfügbare Aktionstypen."""
EMAIL = "email"
WEBHOOK = "webhook"
SLACK = "slack"
TEAMS = "teams"
TAG = "tag"
ARCHIVE = "archive"
@dataclass
class ActionResult:
"""Ergebnis einer ausgeführten Aktion."""
success: bool
action_type: ActionType
message: str
timestamp: datetime = field(default_factory=datetime.utcnow)
details: Dict[str, Any] = field(default_factory=dict)
error: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
"""Konvertiert zu Dict für Logging/Speicherung."""
return {
"success": self.success,
"action_type": self.action_type.value,
"message": self.message,
"timestamp": self.timestamp.isoformat(),
"details": self.details,
"error": self.error,
}
@dataclass
class AlertContext:
"""Kontext für eine Aktion mit Alert-Informationen."""
alert_id: str
title: str
url: str
snippet: str
topic_name: str
relevance_score: Optional[float] = None
relevance_decision: Optional[str] = None
matched_rule: Optional[str] = None
tags: List[str] = field(default_factory=list)
def to_dict(self) -> Dict[str, Any]:
"""Konvertiert zu Dict für Templates."""
return {
"alert_id": self.alert_id,
"title": self.title,
"url": self.url,
"snippet": self.snippet,
"topic_name": self.topic_name,
"relevance_score": self.relevance_score,
"relevance_decision": self.relevance_decision,
"matched_rule": self.matched_rule,
"tags": self.tags,
}
class ActionHandler(ABC):
"""
Abstrakte Basisklasse für Action-Handler.
Jede Aktion (Email, Webhook, Slack) implementiert diese Schnittstelle.
"""
@property
@abstractmethod
def action_type(self) -> ActionType:
"""Gibt den Aktionstyp zurück."""
pass
@abstractmethod
async def execute(
self,
context: AlertContext,
config: Dict[str, Any],
) -> ActionResult:
"""
Führt die Aktion aus.
Args:
context: Alert-Kontext mit allen relevanten Informationen
config: Aktionsspezifische Konfiguration
Returns:
ActionResult mit Erfolgsstatus und Details
"""
pass
def validate_config(self, config: Dict[str, Any]) -> bool:
"""
Validiert die Aktions-Konfiguration.
Args:
config: Zu validierende Konfiguration
Returns:
True wenn gültig
"""
return True
def get_required_config_fields(self) -> List[str]:
"""
Gibt erforderliche Konfigurationsfelder zurück.
Returns:
Liste von Feldnamen
"""
return []

View File

@@ -0,0 +1,232 @@
"""
Action Dispatcher für Alerts Agent.
Verteilt Aktionen an die entsprechenden Handler.
"""
import logging
from typing import Dict, Any, List, Optional
from datetime import datetime
from .base import ActionHandler, ActionResult, ActionType, AlertContext
from .email_action import EmailAction
from .webhook_action import WebhookAction
from .slack_action import SlackAction
logger = logging.getLogger(__name__)
class ActionDispatcher:
"""
Zentrale Verteilung von Aktionen an Handler.
Registriert Handler für verschiedene Aktionstypen und
führt Aktionen basierend auf Regel-Konfigurationen aus.
"""
def __init__(self):
"""Initialisiert den Dispatcher mit Standard-Handlern."""
self._handlers: Dict[ActionType, ActionHandler] = {}
# Standard-Handler registrieren
self.register_handler(EmailAction())
self.register_handler(WebhookAction())
self.register_handler(SlackAction())
def register_handler(self, handler: ActionHandler) -> None:
"""
Registriert einen Action-Handler.
Args:
handler: Handler-Instanz
"""
self._handlers[handler.action_type] = handler
logger.debug(f"Registered action handler: {handler.action_type.value}")
def get_handler(self, action_type: ActionType) -> Optional[ActionHandler]:
"""
Gibt den Handler für einen Aktionstyp zurück.
Args:
action_type: Aktionstyp
Returns:
Handler oder None
"""
return self._handlers.get(action_type)
def list_handlers(self) -> List[str]:
"""Gibt Liste der registrierten Handler zurück."""
return [at.value for at in self._handlers.keys()]
async def dispatch(
self,
action_type: str,
context: AlertContext,
config: Dict[str, Any],
) -> ActionResult:
"""
Führt eine Aktion aus.
Args:
action_type: Aktionstyp als String (email, webhook, slack)
context: Alert-Kontext
config: Aktionsspezifische Konfiguration
Returns:
ActionResult
"""
try:
# ActionType aus String
at = ActionType(action_type.lower())
except ValueError:
return ActionResult(
success=False,
action_type=ActionType.WEBHOOK, # Fallback
message=f"Unbekannter Aktionstyp: {action_type}",
error="Unknown action type",
)
handler = self.get_handler(at)
if not handler:
return ActionResult(
success=False,
action_type=at,
message=f"Kein Handler für {action_type} registriert",
error="No handler registered",
)
# Konfiguration validieren
if not handler.validate_config(config):
required = handler.get_required_config_fields()
return ActionResult(
success=False,
action_type=at,
message=f"Ungültige Konfiguration für {action_type}",
error=f"Required fields: {required}",
)
# Aktion ausführen
logger.info(f"Dispatching {action_type} action for alert {context.alert_id[:8]}")
result = await handler.execute(context, config)
return result
async def dispatch_multiple(
self,
actions: List[Dict[str, Any]],
context: AlertContext,
) -> List[ActionResult]:
"""
Führt mehrere Aktionen aus.
Args:
actions: Liste von Aktionen [{type, config}, ...]
context: Alert-Kontext
Returns:
Liste von ActionResults
"""
results = []
for action in actions:
action_type = action.get("type", action.get("action_type", ""))
config = action.get("config", action.get("action_config", {}))
result = await self.dispatch(action_type, context, config)
results.append(result)
return results
# Singleton-Instanz
_dispatcher: Optional[ActionDispatcher] = None
def get_dispatcher() -> ActionDispatcher:
"""Gibt den globalen ActionDispatcher zurück."""
global _dispatcher
if _dispatcher is None:
_dispatcher = ActionDispatcher()
return _dispatcher
async def execute_action(
action_type: str,
alert_id: str,
title: str,
url: str,
snippet: str,
topic_name: str,
config: Dict[str, Any],
relevance_score: Optional[float] = None,
relevance_decision: Optional[str] = None,
matched_rule: Optional[str] = None,
tags: Optional[List[str]] = None,
) -> ActionResult:
"""
Convenience-Funktion zum Ausführen einer Aktion.
Erstellt den Kontext und ruft den Dispatcher auf.
"""
context = AlertContext(
alert_id=alert_id,
title=title,
url=url,
snippet=snippet,
topic_name=topic_name,
relevance_score=relevance_score,
relevance_decision=relevance_decision,
matched_rule=matched_rule,
tags=tags or [],
)
dispatcher = get_dispatcher()
return await dispatcher.dispatch(action_type, context, config)
async def execute_rule_actions(
alert_id: str,
title: str,
url: str,
snippet: str,
topic_name: str,
rule_action: str,
rule_config: Dict[str, Any],
rule_name: str,
) -> ActionResult:
"""
Führt die Aktion einer gematschten Regel aus.
Args:
alert_id: Alert-ID
title: Alert-Titel
url: Alert-URL
snippet: Alert-Snippet
topic_name: Topic-Name
rule_action: Aktionstyp der Regel
rule_config: Aktions-Konfiguration
rule_name: Name der Regel
Returns:
ActionResult
"""
# Nur externe Aktionen (email, webhook, slack) hier behandeln
# keep/drop/tag werden direkt von der Rule Engine behandelt
if rule_action not in ["email", "webhook", "slack"]:
return ActionResult(
success=True,
action_type=ActionType.TAG, # Dummy
message=f"Interne Aktion {rule_action} von Rule Engine behandelt",
)
return await execute_action(
action_type=rule_action,
alert_id=alert_id,
title=title,
url=url,
snippet=snippet,
topic_name=topic_name,
config=rule_config,
matched_rule=rule_name,
)

View File

@@ -0,0 +1,251 @@
"""
Email Action für Alerts Agent.
Sendet E-Mail-Benachrichtigungen für Alerts.
"""
import logging
from typing import Dict, Any, List
from datetime import datetime
from .base import ActionHandler, ActionResult, ActionType, AlertContext
logger = logging.getLogger(__name__)
# HTML-Template für Alert-E-Mails
EMAIL_TEMPLATE = """
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<style>
body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; line-height: 1.6; color: #333; }}
.container {{ max-width: 600px; margin: 0 auto; padding: 20px; }}
.header {{ background: #4A90E2; color: white; padding: 20px; border-radius: 8px 8px 0 0; }}
.content {{ background: #f9f9f9; padding: 20px; border: 1px solid #ddd; border-top: none; }}
.alert-card {{ background: white; padding: 15px; margin: 10px 0; border-radius: 4px; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }}
.alert-title {{ font-size: 16px; font-weight: 600; color: #1a1a1a; margin-bottom: 8px; }}
.alert-title a {{ color: #4A90E2; text-decoration: none; }}
.alert-snippet {{ font-size: 14px; color: #666; margin-bottom: 8px; }}
.alert-meta {{ font-size: 12px; color: #999; }}
.badge {{ display: inline-block; padding: 2px 8px; border-radius: 12px; font-size: 11px; font-weight: 500; }}
.badge-keep {{ background: #d4edda; color: #155724; }}
.badge-review {{ background: #fff3cd; color: #856404; }}
.footer {{ padding: 15px; text-align: center; font-size: 12px; color: #999; }}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h2 style="margin: 0;">BreakPilot Alert</h2>
<p style="margin: 5px 0 0 0; opacity: 0.9;">{topic_name}</p>
</div>
<div class="content">
<div class="alert-card">
<div class="alert-title">
<a href="{url}">{title}</a>
</div>
<div class="alert-snippet">{snippet}</div>
<div class="alert-meta">
{decision_badge}
{score_display}
{rule_display}
</div>
</div>
</div>
<div class="footer">
Gesendet von BreakPilot Alerts Agent<br>
<a href="{dashboard_url}" style="color: #4A90E2;">Zur Inbox</a>
</div>
</div>
</body>
</html>
"""
class EmailAction(ActionHandler):
"""
E-Mail-Benachrichtigungen für Alerts.
Konfiguration:
- to: E-Mail-Adresse(n) des Empfängers
- subject_prefix: Optionaler Betreff-Prefix
- include_snippet: Snippet einbinden (default: true)
"""
@property
def action_type(self) -> ActionType:
return ActionType.EMAIL
def get_required_config_fields(self) -> List[str]:
return ["to"]
def validate_config(self, config: Dict[str, Any]) -> bool:
to = config.get("to")
if not to:
return False
if isinstance(to, str):
return "@" in to
if isinstance(to, list):
return all("@" in email for email in to)
return False
async def execute(
self,
context: AlertContext,
config: Dict[str, Any],
) -> ActionResult:
"""
Sendet eine E-Mail-Benachrichtigung.
Args:
context: Alert-Kontext
config: E-Mail-Konfiguration (to, subject_prefix, etc.)
Returns:
ActionResult
"""
try:
# Empfänger
to = config.get("to")
if isinstance(to, str):
recipients = [to]
else:
recipients = to
# Betreff
subject_prefix = config.get("subject_prefix", "[BreakPilot Alert]")
subject = f"{subject_prefix} {context.title[:50]}"
# HTML-Body generieren
html_body = self._render_email(context, config)
# E-Mail senden
sent = await self._send_email(
recipients=recipients,
subject=subject,
html_body=html_body,
)
if sent:
return ActionResult(
success=True,
action_type=self.action_type,
message=f"E-Mail an {len(recipients)} Empfänger gesendet",
details={"recipients": recipients, "subject": subject},
)
else:
return ActionResult(
success=False,
action_type=self.action_type,
message="E-Mail konnte nicht gesendet werden",
error="SMTP-Fehler",
)
except Exception as e:
logger.error(f"Email action error: {e}")
return ActionResult(
success=False,
action_type=self.action_type,
message="E-Mail-Fehler",
error=str(e),
)
def _render_email(
self,
context: AlertContext,
config: Dict[str, Any],
) -> str:
"""Rendert das E-Mail-Template."""
# Decision Badge
decision_badge = ""
if context.relevance_decision:
badge_class = "badge-keep" if context.relevance_decision == "KEEP" else "badge-review"
decision_badge = f'<span class="badge {badge_class}">{context.relevance_decision}</span>'
# Score
score_display = ""
if context.relevance_score is not None:
score_display = f' | Score: {context.relevance_score:.0%}'
# Matched Rule
rule_display = ""
if context.matched_rule:
rule_display = f' | Regel: {context.matched_rule}'
# Snippet
snippet = context.snippet[:200] if context.snippet else ""
if config.get("include_snippet", True) is False:
snippet = ""
# Dashboard URL
dashboard_url = config.get("dashboard_url", "http://localhost:8000/studio#alerts")
return EMAIL_TEMPLATE.format(
topic_name=context.topic_name,
title=context.title,
url=context.url,
snippet=snippet,
decision_badge=decision_badge,
score_display=score_display,
rule_display=rule_display,
dashboard_url=dashboard_url,
)
async def _send_email(
self,
recipients: List[str],
subject: str,
html_body: str,
) -> bool:
"""
Sendet die E-Mail über SMTP.
Verwendet aiosmtplib für async SMTP.
"""
import os
smtp_host = os.getenv("SMTP_HOST", "localhost")
smtp_port = int(os.getenv("SMTP_PORT", "587"))
smtp_user = os.getenv("SMTP_USER", "")
smtp_pass = os.getenv("SMTP_PASS", "")
smtp_from = os.getenv("SMTP_FROM", "alerts@breakpilot.de")
try:
import aiosmtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
# E-Mail erstellen
msg = MIMEMultipart("alternative")
msg["Subject"] = subject
msg["From"] = smtp_from
msg["To"] = ", ".join(recipients)
# HTML-Teil
html_part = MIMEText(html_body, "html", "utf-8")
msg.attach(html_part)
# Senden
await aiosmtplib.send(
msg,
hostname=smtp_host,
port=smtp_port,
username=smtp_user if smtp_user else None,
password=smtp_pass if smtp_pass else None,
start_tls=True if smtp_port == 587 else False,
)
logger.info(f"Email sent to {recipients}")
return True
except ImportError:
logger.warning("aiosmtplib not installed. Email not sent.")
# Im Dev-Modus: Erfolg simulieren
logger.info(f"[DEV] Would send email to {recipients}: {subject}")
return True
except Exception as e:
logger.error(f"SMTP error: {e}")
return False

View File

@@ -0,0 +1,198 @@
"""
Slack Action für Alerts Agent.
Sendet Slack-Nachrichten für Alerts via Incoming Webhooks.
"""
import logging
from typing import Dict, Any, List
import httpx
from .base import ActionHandler, ActionResult, ActionType, AlertContext
logger = logging.getLogger(__name__)
class SlackAction(ActionHandler):
"""
Slack-Benachrichtigungen für Alerts via Incoming Webhooks.
Konfiguration:
- webhook_url: Slack Incoming Webhook URL
- channel: Optional - Channel überschreiben
- username: Optional - Bot-Username (default: BreakPilot Alerts)
- icon_emoji: Optional - Bot-Icon (default: :bell:)
"""
@property
def action_type(self) -> ActionType:
return ActionType.SLACK
def get_required_config_fields(self) -> List[str]:
return ["webhook_url"]
def validate_config(self, config: Dict[str, Any]) -> bool:
url = config.get("webhook_url", "")
return "hooks.slack.com" in url or url.startswith("https://")
async def execute(
self,
context: AlertContext,
config: Dict[str, Any],
) -> ActionResult:
"""
Sendet eine Slack-Nachricht.
Args:
context: Alert-Kontext
config: Slack-Konfiguration (webhook_url, channel, etc.)
Returns:
ActionResult
"""
try:
webhook_url = config.get("webhook_url")
# Slack-Payload mit Block Kit
payload = self._build_slack_payload(context, config)
# Request senden
async with httpx.AsyncClient(timeout=30) as client:
response = await client.post(
webhook_url,
json=payload,
headers={"Content-Type": "application/json"},
)
# Slack gibt "ok" als Text zurück bei Erfolg
success = response.status_code == 200 and response.text == "ok"
return ActionResult(
success=success,
action_type=self.action_type,
message="Slack-Nachricht gesendet" if success else "Slack-Fehler",
details={
"status_code": response.status_code,
"response": response.text[:100],
},
error=None if success else response.text,
)
except Exception as e:
logger.error(f"Slack action error: {e}")
return ActionResult(
success=False,
action_type=self.action_type,
message="Slack-Fehler",
error=str(e),
)
def _build_slack_payload(
self,
context: AlertContext,
config: Dict[str, Any],
) -> Dict[str, Any]:
"""
Erstellt den Slack-Payload mit Block Kit.
Verwendet Rich-Formatting für bessere Darstellung.
"""
# Basis-Payload
payload = {
"username": config.get("username", "BreakPilot Alerts"),
"icon_emoji": config.get("icon_emoji", ":bell:"),
}
# Channel überschreiben wenn angegeben
if config.get("channel"):
payload["channel"] = config["channel"]
# Block Kit Blocks
blocks = [
# Header
{
"type": "header",
"text": {
"type": "plain_text",
"text": f"📰 {context.topic_name}",
"emoji": True,
}
},
# Alert-Titel als Link
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": f"*<{context.url}|{context.title}>*",
}
},
]
# Snippet wenn vorhanden
if context.snippet:
snippet = context.snippet[:200]
if len(context.snippet) > 200:
snippet += "..."
blocks.append({
"type": "section",
"text": {
"type": "plain_text",
"text": snippet,
"emoji": False,
}
})
# Kontext-Felder (Score, Decision, Rule)
fields = []
if context.relevance_score is not None:
score_emoji = "🟢" if context.relevance_score >= 0.7 else "🟡" if context.relevance_score >= 0.4 else "🔴"
fields.append({
"type": "mrkdwn",
"text": f"*Score:* {score_emoji} {context.relevance_score:.0%}",
})
if context.relevance_decision:
decision_emoji = {"KEEP": "", "DROP": "", "REVIEW": "👀"}.get(context.relevance_decision, "")
fields.append({
"type": "mrkdwn",
"text": f"*Decision:* {decision_emoji} {context.relevance_decision}",
})
if context.matched_rule:
fields.append({
"type": "mrkdwn",
"text": f"*Regel:* {context.matched_rule}",
})
if context.tags:
fields.append({
"type": "mrkdwn",
"text": f"*Tags:* {', '.join(context.tags)}",
})
if fields:
blocks.append({
"type": "section",
"fields": fields[:10], # Max 10 Felder
})
# Divider
blocks.append({"type": "divider"})
# Actions (Link zur Inbox)
blocks.append({
"type": "context",
"elements": [
{
"type": "mrkdwn",
"text": f"<{config.get('dashboard_url', 'http://localhost:8000/studio#alerts')}|Zur Alerts Inbox> | Gesendet von BreakPilot",
}
]
})
payload["blocks"] = blocks
# Fallback-Text für Notifications
payload["text"] = f"Neuer Alert: {context.title}"
return payload

View File

@@ -0,0 +1,135 @@
"""
Webhook Action für Alerts Agent.
Sendet HTTP-Webhooks für Alerts.
"""
import logging
from typing import Dict, Any, List
import httpx
from .base import ActionHandler, ActionResult, ActionType, AlertContext
logger = logging.getLogger(__name__)
class WebhookAction(ActionHandler):
"""
Webhook-Benachrichtigungen für Alerts.
Konfiguration:
- url: Webhook-URL
- method: HTTP-Methode (default: POST)
- headers: Zusätzliche Headers
- include_full_context: Vollen Alert-Kontext senden (default: true)
"""
@property
def action_type(self) -> ActionType:
return ActionType.WEBHOOK
def get_required_config_fields(self) -> List[str]:
return ["url"]
def validate_config(self, config: Dict[str, Any]) -> bool:
url = config.get("url", "")
return url.startswith("http://") or url.startswith("https://")
async def execute(
self,
context: AlertContext,
config: Dict[str, Any],
) -> ActionResult:
"""
Sendet einen Webhook.
Args:
context: Alert-Kontext
config: Webhook-Konfiguration (url, method, headers)
Returns:
ActionResult
"""
try:
url = config.get("url")
method = config.get("method", "POST").upper()
headers = config.get("headers", {})
timeout = config.get("timeout", 30)
# Payload erstellen
payload = self._build_payload(context, config)
# Standard-Headers
headers.setdefault("Content-Type", "application/json")
headers.setdefault("User-Agent", "BreakPilot-AlertsAgent/1.0")
# Request senden
async with httpx.AsyncClient(timeout=timeout) as client:
if method == "POST":
response = await client.post(url, json=payload, headers=headers)
elif method == "PUT":
response = await client.put(url, json=payload, headers=headers)
else:
response = await client.get(url, params=payload, headers=headers)
# Erfolg prüfen
success = 200 <= response.status_code < 300
return ActionResult(
success=success,
action_type=self.action_type,
message=f"Webhook {method} {url} - Status {response.status_code}",
details={
"url": url,
"method": method,
"status_code": response.status_code,
"response_length": len(response.text),
},
error=None if success else f"HTTP {response.status_code}",
)
except httpx.TimeoutException:
logger.error(f"Webhook timeout: {config.get('url')}")
return ActionResult(
success=False,
action_type=self.action_type,
message="Webhook Timeout",
error="Request timed out",
)
except Exception as e:
logger.error(f"Webhook error: {e}")
return ActionResult(
success=False,
action_type=self.action_type,
message="Webhook-Fehler",
error=str(e),
)
def _build_payload(
self,
context: AlertContext,
config: Dict[str, Any],
) -> Dict[str, Any]:
"""Erstellt den Webhook-Payload."""
if config.get("include_full_context", True):
# Voller Kontext
return {
"event": "alert.matched",
"alert": context.to_dict(),
"timestamp": self._get_timestamp(),
}
else:
# Minimal-Payload
return {
"event": "alert.matched",
"alert_id": context.alert_id,
"title": context.title,
"url": context.url,
"timestamp": self._get_timestamp(),
}
def _get_timestamp(self) -> str:
"""Gibt aktuellen ISO-Timestamp zurück."""
from datetime import datetime
return datetime.utcnow().isoformat() + "Z"

View File

@@ -0,0 +1,17 @@
"""Alert Agent API."""
from fastapi import APIRouter
from .routes import router as main_router
from .topics import router as topics_router
from .rules import router as rules_router
# Erstelle einen kombinierten Router
router = APIRouter(prefix="/alerts", tags=["Alerts Agent"])
# Include alle Sub-Router
router.include_router(main_router)
router.include_router(topics_router)
router.include_router(rules_router)
__all__ = ["router"]

View File

@@ -0,0 +1,551 @@
"""
API Routes fuer Alert Digests (Wochenzusammenfassungen).
Endpoints:
- GET /digests - Liste aller Digests fuer den User
- GET /digests/{id} - Digest-Details
- GET /digests/{id}/pdf - PDF-Download
- POST /digests/generate - Digest manuell generieren
- POST /digests/{id}/send-email - Digest per E-Mail versenden
"""
import uuid
import io
from typing import Optional, List
from datetime import datetime, timedelta
from fastapi import APIRouter, Depends, HTTPException, Query, Response
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from sqlalchemy.orm import Session as DBSession
from ..db.database import get_db
from ..db.models import (
AlertDigestDB, UserAlertSubscriptionDB, DigestStatusEnum
)
from ..processing.digest_generator import DigestGenerator
router = APIRouter(prefix="/digests", tags=["digests"])
# ============================================================================
# Request/Response Models
# ============================================================================
class DigestListItem(BaseModel):
"""Kurze Digest-Info fuer Liste."""
id: str
period_start: datetime
period_end: datetime
total_alerts: int
critical_count: int
urgent_count: int
status: str
created_at: datetime
class DigestDetail(BaseModel):
"""Vollstaendige Digest-Details."""
id: str
subscription_id: Optional[str]
user_id: str
period_start: datetime
period_end: datetime
summary_html: str
summary_pdf_url: Optional[str]
total_alerts: int
critical_count: int
urgent_count: int
important_count: int
review_count: int
info_count: int
status: str
sent_at: Optional[datetime]
created_at: datetime
class DigestListResponse(BaseModel):
"""Response fuer Digest-Liste."""
digests: List[DigestListItem]
total: int
class GenerateDigestRequest(BaseModel):
"""Request fuer manuelle Digest-Generierung."""
weeks_back: int = Field(default=1, ge=1, le=4, description="Wochen zurueck")
force_regenerate: bool = Field(default=False, description="Vorhandenen Digest ueberschreiben")
class GenerateDigestResponse(BaseModel):
"""Response fuer Digest-Generierung."""
status: str
digest_id: Optional[str]
message: str
class SendEmailRequest(BaseModel):
"""Request fuer E-Mail-Versand."""
email: Optional[str] = Field(default=None, description="E-Mail-Adresse (optional, sonst aus Subscription)")
class SendEmailResponse(BaseModel):
"""Response fuer E-Mail-Versand."""
status: str
sent_to: str
message: str
# ============================================================================
# Helper Functions
# ============================================================================
def get_user_id_from_request() -> str:
"""
Extrahiert User-ID aus Request.
TODO: JWT-Token auswerten, aktuell Dummy.
"""
return "demo-user"
def _digest_to_list_item(digest: AlertDigestDB) -> DigestListItem:
"""Konvertiere DB-Model zu List-Item."""
return DigestListItem(
id=digest.id,
period_start=digest.period_start,
period_end=digest.period_end,
total_alerts=digest.total_alerts or 0,
critical_count=digest.critical_count or 0,
urgent_count=digest.urgent_count or 0,
status=digest.status.value if digest.status else "pending",
created_at=digest.created_at
)
def _digest_to_detail(digest: AlertDigestDB) -> DigestDetail:
"""Konvertiere DB-Model zu Detail."""
return DigestDetail(
id=digest.id,
subscription_id=digest.subscription_id,
user_id=digest.user_id,
period_start=digest.period_start,
period_end=digest.period_end,
summary_html=digest.summary_html or "",
summary_pdf_url=digest.summary_pdf_url,
total_alerts=digest.total_alerts or 0,
critical_count=digest.critical_count or 0,
urgent_count=digest.urgent_count or 0,
important_count=digest.important_count or 0,
review_count=digest.review_count or 0,
info_count=digest.info_count or 0,
status=digest.status.value if digest.status else "pending",
sent_at=digest.sent_at,
created_at=digest.created_at
)
# ============================================================================
# Endpoints
# ============================================================================
@router.get("", response_model=DigestListResponse)
async def list_digests(
limit: int = Query(10, ge=1, le=50),
offset: int = Query(0, ge=0),
db: DBSession = Depends(get_db)
):
"""
Liste alle Digests des aktuellen Users.
Sortiert nach Erstellungsdatum (neueste zuerst).
"""
user_id = get_user_id_from_request()
query = db.query(AlertDigestDB).filter(
AlertDigestDB.user_id == user_id
).order_by(AlertDigestDB.created_at.desc())
total = query.count()
digests = query.offset(offset).limit(limit).all()
return DigestListResponse(
digests=[_digest_to_list_item(d) for d in digests],
total=total
)
@router.get("/latest", response_model=DigestDetail)
async def get_latest_digest(
db: DBSession = Depends(get_db)
):
"""
Hole den neuesten Digest des Users.
"""
user_id = get_user_id_from_request()
digest = db.query(AlertDigestDB).filter(
AlertDigestDB.user_id == user_id
).order_by(AlertDigestDB.created_at.desc()).first()
if not digest:
raise HTTPException(status_code=404, detail="Kein Digest vorhanden")
return _digest_to_detail(digest)
@router.get("/{digest_id}", response_model=DigestDetail)
async def get_digest(
digest_id: str,
db: DBSession = Depends(get_db)
):
"""
Hole Details eines spezifischen Digests.
"""
user_id = get_user_id_from_request()
digest = db.query(AlertDigestDB).filter(
AlertDigestDB.id == digest_id,
AlertDigestDB.user_id == user_id
).first()
if not digest:
raise HTTPException(status_code=404, detail="Digest nicht gefunden")
return _digest_to_detail(digest)
@router.get("/{digest_id}/pdf")
async def get_digest_pdf(
digest_id: str,
db: DBSession = Depends(get_db)
):
"""
Generiere und lade PDF-Version des Digests herunter.
"""
user_id = get_user_id_from_request()
digest = db.query(AlertDigestDB).filter(
AlertDigestDB.id == digest_id,
AlertDigestDB.user_id == user_id
).first()
if not digest:
raise HTTPException(status_code=404, detail="Digest nicht gefunden")
if not digest.summary_html:
raise HTTPException(status_code=400, detail="Digest hat keinen Inhalt")
# PDF generieren
try:
pdf_bytes = await generate_pdf_from_html(digest.summary_html)
except Exception as e:
raise HTTPException(status_code=500, detail=f"PDF-Generierung fehlgeschlagen: {str(e)}")
# Dateiname
filename = f"wochenbericht_{digest.period_start.strftime('%Y%m%d')}_{digest.period_end.strftime('%Y%m%d')}.pdf"
return StreamingResponse(
io.BytesIO(pdf_bytes),
media_type="application/pdf",
headers={
"Content-Disposition": f"attachment; filename={filename}"
}
)
@router.get("/latest/pdf")
async def get_latest_digest_pdf(
db: DBSession = Depends(get_db)
):
"""
PDF des neuesten Digests herunterladen.
"""
user_id = get_user_id_from_request()
digest = db.query(AlertDigestDB).filter(
AlertDigestDB.user_id == user_id
).order_by(AlertDigestDB.created_at.desc()).first()
if not digest:
raise HTTPException(status_code=404, detail="Kein Digest vorhanden")
if not digest.summary_html:
raise HTTPException(status_code=400, detail="Digest hat keinen Inhalt")
# PDF generieren
try:
pdf_bytes = await generate_pdf_from_html(digest.summary_html)
except Exception as e:
raise HTTPException(status_code=500, detail=f"PDF-Generierung fehlgeschlagen: {str(e)}")
filename = f"wochenbericht_{digest.period_start.strftime('%Y%m%d')}_{digest.period_end.strftime('%Y%m%d')}.pdf"
return StreamingResponse(
io.BytesIO(pdf_bytes),
media_type="application/pdf",
headers={
"Content-Disposition": f"attachment; filename={filename}"
}
)
@router.post("/generate", response_model=GenerateDigestResponse)
async def generate_digest(
request: GenerateDigestRequest = None,
db: DBSession = Depends(get_db)
):
"""
Generiere einen neuen Digest manuell.
Normalerweise werden Digests automatisch woechentlich generiert.
Diese Route erlaubt manuelle Generierung fuer Tests oder On-Demand.
"""
user_id = get_user_id_from_request()
weeks_back = request.weeks_back if request else 1
# Pruefe ob bereits ein Digest fuer diesen Zeitraum existiert
now = datetime.utcnow()
period_end = now - timedelta(days=now.weekday())
period_start = period_end - timedelta(weeks=weeks_back)
existing = db.query(AlertDigestDB).filter(
AlertDigestDB.user_id == user_id,
AlertDigestDB.period_start >= period_start - timedelta(days=1),
AlertDigestDB.period_end <= period_end + timedelta(days=1)
).first()
if existing and not (request and request.force_regenerate):
return GenerateDigestResponse(
status="exists",
digest_id=existing.id,
message="Digest fuer diesen Zeitraum existiert bereits"
)
# Generiere neuen Digest
generator = DigestGenerator(db)
try:
digest = await generator.generate_weekly_digest(user_id, weeks_back)
if digest:
return GenerateDigestResponse(
status="success",
digest_id=digest.id,
message="Digest erfolgreich generiert"
)
else:
return GenerateDigestResponse(
status="empty",
digest_id=None,
message="Keine Alerts fuer diesen Zeitraum vorhanden"
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Fehler bei Digest-Generierung: {str(e)}")
@router.post("/{digest_id}/send-email", response_model=SendEmailResponse)
async def send_digest_email(
digest_id: str,
request: SendEmailRequest = None,
db: DBSession = Depends(get_db)
):
"""
Versende Digest per E-Mail.
"""
user_id = get_user_id_from_request()
digest = db.query(AlertDigestDB).filter(
AlertDigestDB.id == digest_id,
AlertDigestDB.user_id == user_id
).first()
if not digest:
raise HTTPException(status_code=404, detail="Digest nicht gefunden")
# E-Mail-Adresse ermitteln
email = None
if request and request.email:
email = request.email
else:
# Aus Subscription holen
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.id == digest.subscription_id
).first()
if subscription:
email = subscription.notification_email
if not email:
raise HTTPException(status_code=400, detail="Keine E-Mail-Adresse angegeben")
# E-Mail versenden
try:
await send_digest_by_email(digest, email)
# Status aktualisieren
digest.status = DigestStatusEnum.SENT
digest.sent_at = datetime.utcnow()
db.commit()
return SendEmailResponse(
status="success",
sent_to=email,
message="E-Mail erfolgreich versendet"
)
except Exception as e:
digest.status = DigestStatusEnum.FAILED
db.commit()
raise HTTPException(status_code=500, detail=f"E-Mail-Versand fehlgeschlagen: {str(e)}")
# ============================================================================
# PDF Generation
# ============================================================================
async def generate_pdf_from_html(html_content: str) -> bytes:
"""
Generiere PDF aus HTML.
Verwendet WeasyPrint oder wkhtmltopdf als Fallback.
"""
try:
# Versuche WeasyPrint (bevorzugt)
from weasyprint import HTML
pdf_bytes = HTML(string=html_content).write_pdf()
return pdf_bytes
except ImportError:
pass
try:
# Fallback: wkhtmltopdf via pdfkit
import pdfkit
pdf_bytes = pdfkit.from_string(html_content, False)
return pdf_bytes
except ImportError:
pass
try:
# Fallback: xhtml2pdf
from xhtml2pdf import pisa
result = io.BytesIO()
pisa.CreatePDF(io.StringIO(html_content), dest=result)
return result.getvalue()
except ImportError:
pass
# Letzter Fallback: Einfache Text-Konvertierung
raise ImportError(
"Keine PDF-Bibliothek verfuegbar. "
"Installieren Sie: pip install weasyprint oder pip install pdfkit oder pip install xhtml2pdf"
)
# ============================================================================
# Email Sending
# ============================================================================
async def send_digest_by_email(digest: AlertDigestDB, recipient_email: str):
"""
Versende Digest per E-Mail.
Verwendet:
- Lokalen SMTP-Server (Postfix/Sendmail)
- SMTP-Relay (z.B. SES, Mailgun)
- SendGrid API
"""
import os
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
# E-Mail zusammenstellen
msg = MIMEMultipart('alternative')
msg['Subject'] = f"Wochenbericht: {digest.period_start.strftime('%d.%m.%Y')} - {digest.period_end.strftime('%d.%m.%Y')}"
msg['From'] = os.getenv('SMTP_FROM', 'alerts@breakpilot.app')
msg['To'] = recipient_email
# Text-Version
text_content = f"""
BreakPilot Alerts - Wochenbericht
Zeitraum: {digest.period_start.strftime('%d.%m.%Y')} - {digest.period_end.strftime('%d.%m.%Y')}
Gesamt: {digest.total_alerts} Meldungen
Kritisch: {digest.critical_count}
Dringend: {digest.urgent_count}
Oeffnen Sie die HTML-Version fuer die vollstaendige Uebersicht.
---
Diese E-Mail wurde automatisch von BreakPilot Alerts generiert.
"""
msg.attach(MIMEText(text_content, 'plain', 'utf-8'))
# HTML-Version
if digest.summary_html:
msg.attach(MIMEText(digest.summary_html, 'html', 'utf-8'))
# PDF-Anhang (optional)
try:
pdf_bytes = await generate_pdf_from_html(digest.summary_html)
pdf_attachment = MIMEApplication(pdf_bytes, _subtype='pdf')
pdf_attachment.add_header(
'Content-Disposition', 'attachment',
filename=f"wochenbericht_{digest.period_start.strftime('%Y%m%d')}.pdf"
)
msg.attach(pdf_attachment)
except Exception:
pass # PDF-Anhang ist optional
# Senden
smtp_host = os.getenv('SMTP_HOST', 'localhost')
smtp_port = int(os.getenv('SMTP_PORT', '25'))
smtp_user = os.getenv('SMTP_USER', '')
smtp_pass = os.getenv('SMTP_PASS', '')
try:
if smtp_port == 465:
# SSL
server = smtplib.SMTP_SSL(smtp_host, smtp_port)
else:
server = smtplib.SMTP(smtp_host, smtp_port)
if smtp_port == 587:
server.starttls()
if smtp_user and smtp_pass:
server.login(smtp_user, smtp_pass)
server.send_message(msg)
server.quit()
except Exception as e:
# Fallback: SendGrid API
sendgrid_key = os.getenv('SENDGRID_API_KEY')
if sendgrid_key:
await send_via_sendgrid(msg, sendgrid_key)
else:
raise e
async def send_via_sendgrid(msg, api_key: str):
"""Fallback: SendGrid API."""
import httpx
async with httpx.AsyncClient() as client:
response = await client.post(
"https://api.sendgrid.com/v3/mail/send",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
},
json={
"personalizations": [{"to": [{"email": msg['To']}]}],
"from": {"email": msg['From']},
"subject": msg['Subject'],
"content": [
{"type": "text/plain", "value": msg.get_payload(0).get_payload()},
{"type": "text/html", "value": msg.get_payload(1).get_payload() if len(msg.get_payload()) > 1 else ""}
]
}
)
if response.status_code >= 400:
raise Exception(f"SendGrid error: {response.status_code}")

View File

@@ -0,0 +1,510 @@
"""
API Routes für Alerts Agent.
Endpoints:
- POST /alerts/ingest - Manuell Alerts importieren
- POST /alerts/run - Scoring Pipeline starten
- GET /alerts/inbox - Inbox Items abrufen
- POST /alerts/feedback - Relevanz-Feedback geben
- GET /alerts/profile - User Relevance Profile
- PUT /alerts/profile - Profile aktualisieren
"""
import os
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, Field
from ..models.alert_item import AlertItem, AlertStatus
from ..models.relevance_profile import RelevanceProfile, PriorityItem
from ..processing.relevance_scorer import RelevanceDecision, RelevanceScorer
router = APIRouter(prefix="/alerts", tags=["alerts"])
# LLM Scorer Konfiguration aus Umgebungsvariablen
LLM_GATEWAY_URL = os.getenv("LLM_GATEWAY_URL", "http://localhost:8000/llm")
LLM_API_KEY = os.getenv("LLM_API_KEYS", "").split(",")[0] if os.getenv("LLM_API_KEYS") else ""
ALERTS_USE_LLM = os.getenv("ALERTS_USE_LLM", "false").lower() == "true"
# ============================================================================
# In-Memory Storage (später durch DB ersetzen)
# ============================================================================
_alerts_store: dict[str, AlertItem] = {}
_profile_store: dict[str, RelevanceProfile] = {}
# ============================================================================
# Request/Response Models
# ============================================================================
class AlertIngestRequest(BaseModel):
"""Request für manuelles Alert-Import."""
title: str = Field(..., min_length=1, max_length=500)
url: str = Field(..., min_length=1)
snippet: Optional[str] = Field(default=None, max_length=2000)
topic_label: str = Field(default="Manual Import")
published_at: Optional[datetime] = None
class AlertIngestResponse(BaseModel):
"""Response für Alert-Import."""
id: str
status: str
message: str
class AlertRunRequest(BaseModel):
"""Request für Scoring-Pipeline."""
limit: int = Field(default=50, ge=1, le=200)
skip_scored: bool = Field(default=True)
class AlertRunResponse(BaseModel):
"""Response für Scoring-Pipeline."""
processed: int
keep: int
drop: int
review: int
errors: int
duration_ms: int
class InboxItem(BaseModel):
"""Ein Item in der Inbox."""
id: str
title: str
url: str
snippet: Optional[str]
topic_label: str
published_at: Optional[datetime]
relevance_score: Optional[float]
relevance_decision: Optional[str]
relevance_summary: Optional[str]
status: str
class InboxResponse(BaseModel):
"""Response für Inbox-Abfrage."""
items: list[InboxItem]
total: int
page: int
page_size: int
class FeedbackRequest(BaseModel):
"""Request für Relevanz-Feedback."""
alert_id: str
is_relevant: bool
reason: Optional[str] = None
tags: list[str] = Field(default_factory=list)
class FeedbackResponse(BaseModel):
"""Response für Feedback."""
success: bool
message: str
profile_updated: bool
class ProfilePriorityRequest(BaseModel):
"""Priority für Profile-Update."""
label: str
weight: float = Field(default=0.5, ge=0.0, le=1.0)
keywords: list[str] = Field(default_factory=list)
description: Optional[str] = None
class ProfileUpdateRequest(BaseModel):
"""Request für Profile-Update."""
priorities: Optional[list[ProfilePriorityRequest]] = None
exclusions: Optional[list[str]] = None
policies: Optional[dict] = None
class ProfileResponse(BaseModel):
"""Response für Profile."""
id: str
priorities: list[dict]
exclusions: list[str]
policies: dict
total_scored: int
total_kept: int
total_dropped: int
accuracy_estimate: Optional[float]
# ============================================================================
# Endpoints
# ============================================================================
@router.post("/ingest", response_model=AlertIngestResponse)
async def ingest_alert(request: AlertIngestRequest):
"""
Manuell einen Alert importieren.
Nützlich für Tests oder manuelles Hinzufügen von Artikeln.
"""
alert = AlertItem(
title=request.title,
url=request.url,
snippet=request.snippet or "",
topic_label=request.topic_label,
published_at=request.published_at,
)
_alerts_store[alert.id] = alert
return AlertIngestResponse(
id=alert.id,
status="created",
message=f"Alert '{alert.title[:50]}...' importiert"
)
@router.post("/run", response_model=AlertRunResponse)
async def run_scoring_pipeline(request: AlertRunRequest):
"""
Scoring-Pipeline für neue Alerts starten.
Bewertet alle unbewerteten Alerts und klassifiziert sie
in KEEP, DROP oder REVIEW.
Wenn ALERTS_USE_LLM=true, wird das LLM Gateway für Scoring verwendet.
Sonst wird ein schnelles Keyword-basiertes Scoring durchgeführt.
"""
import time
start = time.time()
# Alle unbewerteten Alerts holen
alerts_to_score = [
a for a in _alerts_store.values()
if a.status == AlertStatus.NEW or (not request.skip_scored and a.status == AlertStatus.SCORED)
][:request.limit]
if not alerts_to_score:
return AlertRunResponse(
processed=0, keep=0, drop=0, review=0, errors=0,
duration_ms=int((time.time() - start) * 1000)
)
keep = drop = review = errors = 0
# Profil für Scoring laden
profile = _profile_store.get("default")
if not profile:
profile = RelevanceProfile.create_default_education_profile()
profile.id = "default"
_profile_store["default"] = profile
if ALERTS_USE_LLM and LLM_API_KEY:
# LLM-basiertes Scoring über Gateway
scorer = RelevanceScorer(
gateway_url=LLM_GATEWAY_URL,
api_key=LLM_API_KEY,
model="breakpilot-teacher-8b",
)
try:
results = await scorer.score_batch(alerts_to_score, profile=profile)
for result in results:
if result.error:
errors += 1
elif result.decision == RelevanceDecision.KEEP:
keep += 1
elif result.decision == RelevanceDecision.DROP:
drop += 1
else:
review += 1
finally:
await scorer.close()
else:
# Fallback: Keyword-basiertes Scoring (schnell, ohne LLM)
for alert in alerts_to_score:
title_lower = alert.title.lower()
snippet_lower = (alert.snippet or "").lower()
combined = title_lower + " " + snippet_lower
# Ausschlüsse aus Profil prüfen
if any(excl.lower() in combined for excl in profile.exclusions):
alert.relevance_score = 0.15
alert.relevance_decision = RelevanceDecision.DROP.value
drop += 1
# Prioritäten aus Profil prüfen
elif any(
p.label.lower() in combined or
any(kw.lower() in combined for kw in (p.keywords if hasattr(p, 'keywords') else []))
for p in profile.priorities
):
alert.relevance_score = 0.85
alert.relevance_decision = RelevanceDecision.KEEP.value
keep += 1
else:
alert.relevance_score = 0.55
alert.relevance_decision = RelevanceDecision.REVIEW.value
review += 1
alert.status = AlertStatus.SCORED
duration_ms = int((time.time() - start) * 1000)
return AlertRunResponse(
processed=len(alerts_to_score),
keep=keep,
drop=drop,
review=review,
errors=errors,
duration_ms=duration_ms,
)
@router.get("/inbox", response_model=InboxResponse)
async def get_inbox(
decision: Optional[str] = Query(default=None, description="Filter: KEEP, DROP, REVIEW"),
page: int = Query(default=1, ge=1),
page_size: int = Query(default=20, ge=1, le=100),
):
"""
Inbox Items abrufen.
Filtert nach Relevanz-Entscheidung. Standard zeigt KEEP und REVIEW.
"""
# Filter Alerts
alerts = list(_alerts_store.values())
if decision:
alerts = [a for a in alerts if a.relevance_decision == decision.upper()]
else:
# Standard: KEEP und REVIEW zeigen
alerts = [a for a in alerts if a.relevance_decision in ["KEEP", "REVIEW"]]
# Sortieren nach Score (absteigend)
alerts.sort(key=lambda a: a.relevance_score or 0, reverse=True)
# Pagination
total = len(alerts)
start = (page - 1) * page_size
end = start + page_size
page_alerts = alerts[start:end]
items = [
InboxItem(
id=a.id,
title=a.title,
url=a.url,
snippet=a.snippet,
topic_label=a.topic_label,
published_at=a.published_at,
relevance_score=a.relevance_score,
relevance_decision=a.relevance_decision,
relevance_summary=a.relevance_summary,
status=a.status.value,
)
for a in page_alerts
]
return InboxResponse(
items=items,
total=total,
page=page,
page_size=page_size,
)
@router.post("/feedback", response_model=FeedbackResponse)
async def submit_feedback(request: FeedbackRequest):
"""
Feedback zu einem Alert geben.
Das Feedback wird verwendet um das Relevanzprofil zu verbessern.
"""
alert = _alerts_store.get(request.alert_id)
if not alert:
raise HTTPException(status_code=404, detail="Alert nicht gefunden")
# Alert Status aktualisieren
alert.status = AlertStatus.REVIEWED
# Profile aktualisieren (Default-Profile für Demo)
profile = _profile_store.get("default")
if not profile:
profile = RelevanceProfile.create_default_education_profile()
profile.id = "default"
_profile_store["default"] = profile
profile.update_from_feedback(
alert_title=alert.title,
alert_url=alert.url,
is_relevant=request.is_relevant,
reason=request.reason or "",
)
return FeedbackResponse(
success=True,
message="Feedback gespeichert",
profile_updated=True,
)
@router.get("/profile", response_model=ProfileResponse)
async def get_profile(user_id: Optional[str] = Query(default=None)):
"""
Relevanz-Profil abrufen.
Ohne user_id wird das Default-Profil zurückgegeben.
"""
profile_id = user_id or "default"
profile = _profile_store.get(profile_id)
if not profile:
# Default-Profile erstellen
profile = RelevanceProfile.create_default_education_profile()
profile.id = profile_id
_profile_store[profile_id] = profile
return ProfileResponse(
id=profile.id,
priorities=[p.to_dict() if isinstance(p, PriorityItem) else p
for p in profile.priorities],
exclusions=profile.exclusions,
policies=profile.policies,
total_scored=profile.total_scored,
total_kept=profile.total_kept,
total_dropped=profile.total_dropped,
accuracy_estimate=profile.accuracy_estimate,
)
@router.put("/profile", response_model=ProfileResponse)
async def update_profile(
request: ProfileUpdateRequest,
user_id: Optional[str] = Query(default=None),
):
"""
Relevanz-Profil aktualisieren.
Erlaubt Anpassung von Prioritäten, Ausschlüssen und Policies.
"""
profile_id = user_id or "default"
profile = _profile_store.get(profile_id)
if not profile:
profile = RelevanceProfile()
profile.id = profile_id
# Updates anwenden
if request.priorities is not None:
profile.priorities = [
PriorityItem(
label=p.label,
weight=p.weight,
keywords=p.keywords,
description=p.description,
)
for p in request.priorities
]
if request.exclusions is not None:
profile.exclusions = request.exclusions
if request.policies is not None:
profile.policies = request.policies
profile.updated_at = datetime.utcnow()
_profile_store[profile_id] = profile
return ProfileResponse(
id=profile.id,
priorities=[p.to_dict() if isinstance(p, PriorityItem) else p
for p in profile.priorities],
exclusions=profile.exclusions,
policies=profile.policies,
total_scored=profile.total_scored,
total_kept=profile.total_kept,
total_dropped=profile.total_dropped,
accuracy_estimate=profile.accuracy_estimate,
)
@router.get("/stats")
async def get_stats():
"""
Statistiken über Alerts und Scoring.
Gibt Statistiken im Format zurück, das das Frontend erwartet:
- total_alerts, new_alerts, kept_alerts, review_alerts, dropped_alerts
- total_topics, active_topics, total_rules
"""
alerts = list(_alerts_store.values())
total = len(alerts)
# Zähle nach Status und Decision
new_alerts = sum(1 for a in alerts if a.status == AlertStatus.NEW)
kept_alerts = sum(1 for a in alerts if a.relevance_decision == "KEEP")
review_alerts = sum(1 for a in alerts if a.relevance_decision == "REVIEW")
dropped_alerts = sum(1 for a in alerts if a.relevance_decision == "DROP")
# Topics und Rules (In-Memory hat diese nicht, aber wir geben 0 zurück)
# Bei DB-Implementierung würden wir hier die Repositories nutzen
total_topics = 0
active_topics = 0
total_rules = 0
# Versuche DB-Statistiken zu laden wenn verfügbar
try:
from alerts_agent.db import get_db
from alerts_agent.db.repository import TopicRepository, RuleRepository
from contextlib import contextmanager
# Versuche eine DB-Session zu bekommen
db_gen = get_db()
db = next(db_gen, None)
if db:
try:
topic_repo = TopicRepository(db)
rule_repo = RuleRepository(db)
all_topics = topic_repo.get_all()
total_topics = len(all_topics)
active_topics = len([t for t in all_topics if t.is_active])
all_rules = rule_repo.get_all()
total_rules = len(all_rules)
finally:
try:
next(db_gen, None)
except StopIteration:
pass
except Exception:
# DB nicht verfügbar, nutze In-Memory Defaults
pass
# Berechne Durchschnittsscore
scored_alerts = [a for a in alerts if a.relevance_score is not None]
avg_score = sum(a.relevance_score for a in scored_alerts) / len(scored_alerts) if scored_alerts else 0.0
return {
# Frontend-kompatibles Format
"total_alerts": total,
"new_alerts": new_alerts,
"kept_alerts": kept_alerts,
"review_alerts": review_alerts,
"dropped_alerts": dropped_alerts,
"total_topics": total_topics,
"active_topics": active_topics,
"total_rules": total_rules,
"avg_score": avg_score,
# Zusätzliche Details (Abwärtskompatibilität)
"by_status": {
"new": new_alerts,
"scored": sum(1 for a in alerts if a.status == AlertStatus.SCORED),
"reviewed": sum(1 for a in alerts if a.status == AlertStatus.REVIEWED),
},
"by_decision": {
"KEEP": kept_alerts,
"REVIEW": review_alerts,
"DROP": dropped_alerts,
},
}

View File

@@ -0,0 +1,473 @@
"""
Rules API Routes für Alerts Agent.
CRUD-Operationen für Alert-Regeln.
"""
from typing import List, Optional, Dict, Any
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel, Field
from sqlalchemy.orm import Session as DBSession
from alerts_agent.db import get_db
from alerts_agent.db.repository import RuleRepository
from alerts_agent.db.models import RuleActionEnum
router = APIRouter(prefix="/rules", tags=["alerts"])
# =============================================================================
# PYDANTIC MODELS
# =============================================================================
class RuleConditionModel(BaseModel):
"""Model für eine Regel-Bedingung."""
field: str = Field(..., description="Feld zum Prüfen (title, snippet, url, source, relevance_score)")
operator: str = Field(..., alias="op", description="Operator (contains, not_contains, equals, regex, gt, lt, in)")
value: Any = Field(..., description="Vergleichswert (String, Zahl, oder Liste)")
class Config:
populate_by_name = True
class RuleCreate(BaseModel):
"""Request-Model für Regel-Erstellung."""
name: str = Field(..., min_length=1, max_length=255)
description: str = Field(default="", max_length=2000)
conditions: List[RuleConditionModel] = Field(default_factory=list)
action_type: str = Field(default="keep", description="Aktion: keep, drop, tag, email, webhook, slack")
action_config: Dict[str, Any] = Field(default_factory=dict)
topic_id: Optional[str] = Field(default=None, description="Optional: Nur für bestimmtes Topic")
priority: int = Field(default=0, ge=0, le=1000, description="Priorität (höher = wird zuerst evaluiert)")
is_active: bool = Field(default=True)
class RuleUpdate(BaseModel):
"""Request-Model für Regel-Update."""
name: Optional[str] = Field(default=None, min_length=1, max_length=255)
description: Optional[str] = Field(default=None, max_length=2000)
conditions: Optional[List[RuleConditionModel]] = None
action_type: Optional[str] = None
action_config: Optional[Dict[str, Any]] = None
topic_id: Optional[str] = None
priority: Optional[int] = Field(default=None, ge=0, le=1000)
is_active: Optional[bool] = None
class RuleResponse(BaseModel):
"""Response-Model für Regel."""
id: str
name: str
description: str
conditions: List[Dict[str, Any]]
action_type: str
action_config: Dict[str, Any]
topic_id: Optional[str]
priority: int
is_active: bool
match_count: int
last_matched_at: Optional[datetime]
created_at: datetime
updated_at: datetime
class Config:
from_attributes = True
class RuleListResponse(BaseModel):
"""Response-Model für Regel-Liste."""
rules: List[RuleResponse]
total: int
class RuleTestRequest(BaseModel):
"""Request-Model für Regel-Test."""
title: str = Field(default="Test Title")
snippet: str = Field(default="Test snippet content")
url: str = Field(default="https://example.com/test")
source: str = Field(default="rss_feed")
relevance_score: Optional[float] = Field(default=None)
class RuleTestResponse(BaseModel):
"""Response-Model für Regel-Test."""
rule_id: str
rule_name: str
matched: bool
action: str
conditions_met: List[str]
# =============================================================================
# API ENDPOINTS
# =============================================================================
@router.post("", response_model=RuleResponse, status_code=201)
async def create_rule(
rule: RuleCreate,
db: DBSession = Depends(get_db),
) -> RuleResponse:
"""
Erstellt eine neue Regel.
Regeln werden nach Priorität evaluiert. Höhere Priorität = wird zuerst geprüft.
"""
repo = RuleRepository(db)
# Conditions zu Dict konvertieren
conditions = [
{"field": c.field, "op": c.operator, "value": c.value}
for c in rule.conditions
]
created = repo.create(
name=rule.name,
description=rule.description,
conditions=conditions,
action_type=rule.action_type,
action_config=rule.action_config,
topic_id=rule.topic_id,
priority=rule.priority,
)
if not rule.is_active:
repo.update(created.id, is_active=False)
created = repo.get_by_id(created.id)
return _to_rule_response(created)
@router.get("", response_model=RuleListResponse)
async def list_rules(
is_active: Optional[bool] = None,
topic_id: Optional[str] = None,
db: DBSession = Depends(get_db),
) -> RuleListResponse:
"""
Listet alle Regeln auf.
Regeln sind nach Priorität sortiert (höchste zuerst).
"""
repo = RuleRepository(db)
if is_active is True:
rules = repo.get_active()
else:
rules = repo.get_all()
# Topic-Filter
if topic_id:
rules = [r for r in rules if r.topic_id == topic_id or r.topic_id is None]
return RuleListResponse(
rules=[_to_rule_response(r) for r in rules],
total=len(rules),
)
@router.get("/{rule_id}", response_model=RuleResponse)
async def get_rule(
rule_id: str,
db: DBSession = Depends(get_db),
) -> RuleResponse:
"""
Ruft eine Regel nach ID ab.
"""
repo = RuleRepository(db)
rule = repo.get_by_id(rule_id)
if not rule:
raise HTTPException(status_code=404, detail="Regel nicht gefunden")
return _to_rule_response(rule)
@router.put("/{rule_id}", response_model=RuleResponse)
async def update_rule(
rule_id: str,
updates: RuleUpdate,
db: DBSession = Depends(get_db),
) -> RuleResponse:
"""
Aktualisiert eine Regel.
"""
repo = RuleRepository(db)
# Nur übergebene Werte updaten
update_dict = {}
if updates.name is not None:
update_dict["name"] = updates.name
if updates.description is not None:
update_dict["description"] = updates.description
if updates.conditions is not None:
update_dict["conditions"] = [
{"field": c.field, "op": c.operator, "value": c.value}
for c in updates.conditions
]
if updates.action_type is not None:
update_dict["action_type"] = updates.action_type
if updates.action_config is not None:
update_dict["action_config"] = updates.action_config
if updates.topic_id is not None:
update_dict["topic_id"] = updates.topic_id
if updates.priority is not None:
update_dict["priority"] = updates.priority
if updates.is_active is not None:
update_dict["is_active"] = updates.is_active
if not update_dict:
raise HTTPException(status_code=400, detail="Keine Updates angegeben")
updated = repo.update(rule_id, **update_dict)
if not updated:
raise HTTPException(status_code=404, detail="Regel nicht gefunden")
return _to_rule_response(updated)
@router.delete("/{rule_id}", status_code=204)
async def delete_rule(
rule_id: str,
db: DBSession = Depends(get_db),
):
"""
Löscht eine Regel.
"""
repo = RuleRepository(db)
success = repo.delete(rule_id)
if not success:
raise HTTPException(status_code=404, detail="Regel nicht gefunden")
return None
@router.post("/{rule_id}/activate", response_model=RuleResponse)
async def activate_rule(
rule_id: str,
db: DBSession = Depends(get_db),
) -> RuleResponse:
"""
Aktiviert eine Regel.
"""
repo = RuleRepository(db)
updated = repo.update(rule_id, is_active=True)
if not updated:
raise HTTPException(status_code=404, detail="Regel nicht gefunden")
return _to_rule_response(updated)
@router.post("/{rule_id}/deactivate", response_model=RuleResponse)
async def deactivate_rule(
rule_id: str,
db: DBSession = Depends(get_db),
) -> RuleResponse:
"""
Deaktiviert eine Regel.
"""
repo = RuleRepository(db)
updated = repo.update(rule_id, is_active=False)
if not updated:
raise HTTPException(status_code=404, detail="Regel nicht gefunden")
return _to_rule_response(updated)
@router.post("/{rule_id}/test", response_model=RuleTestResponse)
async def test_rule(
rule_id: str,
test_data: RuleTestRequest,
db: DBSession = Depends(get_db),
) -> RuleTestResponse:
"""
Testet eine Regel gegen Testdaten.
Nützlich um Regeln vor der Aktivierung zu testen.
"""
from alerts_agent.processing.rule_engine import evaluate_rule
from alerts_agent.db.models import AlertItemDB, AlertSourceEnum, AlertStatusEnum
repo = RuleRepository(db)
rule = repo.get_by_id(rule_id)
if not rule:
raise HTTPException(status_code=404, detail="Regel nicht gefunden")
# Mock-Alert für Test erstellen
mock_alert = AlertItemDB(
id="test-alert",
topic_id="test-topic",
title=test_data.title,
snippet=test_data.snippet,
url=test_data.url,
url_hash="test-hash",
source=AlertSourceEnum(test_data.source) if test_data.source else AlertSourceEnum.RSS_FEED,
status=AlertStatusEnum.NEW,
relevance_score=test_data.relevance_score,
)
# Regel evaluieren
match = evaluate_rule(mock_alert, rule)
return RuleTestResponse(
rule_id=match.rule_id,
rule_name=match.rule_name,
matched=match.matched,
action=match.action.value,
conditions_met=match.conditions_met,
)
@router.post("/test-all", response_model=List[RuleTestResponse])
async def test_all_rules(
test_data: RuleTestRequest,
db: DBSession = Depends(get_db),
) -> List[RuleTestResponse]:
"""
Testet alle aktiven Regeln gegen Testdaten.
Zeigt welche Regeln matchen würden.
"""
from alerts_agent.processing.rule_engine import evaluate_rules_for_alert, evaluate_rule
from alerts_agent.db.models import AlertItemDB, AlertSourceEnum, AlertStatusEnum
repo = RuleRepository(db)
rules = repo.get_active()
# Mock-Alert für Test erstellen
mock_alert = AlertItemDB(
id="test-alert",
topic_id="test-topic",
title=test_data.title,
snippet=test_data.snippet,
url=test_data.url,
url_hash="test-hash",
source=AlertSourceEnum(test_data.source) if test_data.source else AlertSourceEnum.RSS_FEED,
status=AlertStatusEnum.NEW,
relevance_score=test_data.relevance_score,
)
results = []
for rule in rules:
match = evaluate_rule(mock_alert, rule)
results.append(RuleTestResponse(
rule_id=match.rule_id,
rule_name=match.rule_name,
matched=match.matched,
action=match.action.value,
conditions_met=match.conditions_met,
))
return results
# =============================================================================
# HELPER FUNCTIONS
# =============================================================================
def _to_rule_response(rule) -> RuleResponse:
"""Konvertiert ein Rule-DB-Objekt zu RuleResponse."""
return RuleResponse(
id=rule.id,
name=rule.name,
description=rule.description or "",
conditions=rule.conditions or [],
action_type=rule.action_type.value if rule.action_type else "keep",
action_config=rule.action_config or {},
topic_id=rule.topic_id,
priority=rule.priority,
is_active=rule.is_active,
match_count=rule.match_count,
last_matched_at=rule.last_matched_at,
created_at=rule.created_at,
updated_at=rule.updated_at,
)
# =============================================================================
# PRESET RULES
# =============================================================================
PRESET_RULES = {
"exclude_jobs": {
"name": "Stellenanzeigen ausschließen",
"description": "Filtert Stellenanzeigen und Job-Postings",
"conditions": [
{"field": "title", "op": "in", "value": ["Stellenanzeige", "Job", "Karriere", "Praktikum", "Werkstudent", "Ausbildung", "Referendariat"]}
],
"action_type": "drop",
"priority": 100,
},
"exclude_ads": {
"name": "Werbung ausschließen",
"description": "Filtert Werbung und Pressemitteilungen",
"conditions": [
{"field": "title", "op": "in", "value": ["Werbung", "Anzeige", "Pressemitteilung", "PR:", "Sponsored"]}
],
"action_type": "drop",
"priority": 100,
},
"keep_inklusion": {
"name": "Inklusion behalten",
"description": "Behält Artikel zum Thema Inklusion",
"conditions": [
{"field": "title", "op": "in", "value": ["Inklusion", "inklusiv", "Förderbedarf", "Förderschule", "Nachteilsausgleich"]}
],
"action_type": "keep",
"priority": 50,
},
"keep_datenschutz": {
"name": "Datenschutz behalten",
"description": "Behält Artikel zum Thema Datenschutz in Schulen",
"conditions": [
{"field": "title", "op": "in", "value": ["DSGVO", "Datenschutz", "Schülerfotos", "personenbezogen"]}
],
"action_type": "keep",
"priority": 50,
},
}
@router.get("/presets/list")
async def list_preset_rules() -> Dict[str, Any]:
"""
Listet verfügbare Regel-Vorlagen auf.
"""
return {
"presets": [
{"id": key, **value}
for key, value in PRESET_RULES.items()
]
}
@router.post("/presets/{preset_id}/apply", response_model=RuleResponse)
async def apply_preset_rule(
preset_id: str,
db: DBSession = Depends(get_db),
) -> RuleResponse:
"""
Wendet eine Regel-Vorlage an (erstellt die Regel).
"""
if preset_id not in PRESET_RULES:
raise HTTPException(status_code=404, detail="Preset nicht gefunden")
preset = PRESET_RULES[preset_id]
repo = RuleRepository(db)
created = repo.create(
name=preset["name"],
description=preset.get("description", ""),
conditions=preset["conditions"],
action_type=preset["action_type"],
priority=preset.get("priority", 0),
)
return _to_rule_response(created)

View File

@@ -0,0 +1,421 @@
"""
API Routes für User Alert Subscriptions.
Verwaltet Nutzer-Abonnements für Templates und Digest-Einstellungen.
Endpoints:
- POST /subscriptions - Neue Subscription erstellen
- GET /subscriptions - User-Subscriptions auflisten
- GET /subscriptions/{id} - Subscription-Details
- PUT /subscriptions/{id} - Subscription aktualisieren
- DELETE /subscriptions/{id} - Subscription deaktivieren
- POST /subscriptions/{id}/activate-template - Template aktivieren
"""
import uuid
from typing import Optional, List
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, Field
from sqlalchemy.orm import Session as DBSession
from ..db.database import get_db
from ..db.models import (
UserAlertSubscriptionDB, AlertTemplateDB, AlertProfileDB,
AlertTopicDB, AlertRuleDB, AlertModeEnum, UserRoleEnum
)
router = APIRouter(prefix="/subscriptions", tags=["subscriptions"])
# ============================================================================
# Request/Response Models
# ============================================================================
class SubscriptionCreate(BaseModel):
"""Request für neue Subscription."""
mode: str = Field(default="guided", description="'guided' oder 'expert'")
user_role: Optional[str] = Field(default=None, description="lehrkraft, schulleitung, it_beauftragte")
template_ids: List[str] = Field(default=[], description="Ausgewählte Template-IDs (max. 3)")
notification_email: Optional[str] = Field(default=None)
digest_enabled: bool = Field(default=True)
digest_frequency: str = Field(default="weekly")
class SubscriptionUpdate(BaseModel):
"""Request für Subscription-Update."""
template_ids: Optional[List[str]] = None
notification_email: Optional[str] = None
digest_enabled: Optional[bool] = None
digest_frequency: Optional[str] = None
is_active: Optional[bool] = None
class SubscriptionResponse(BaseModel):
"""Response für eine Subscription."""
id: str
user_id: str
mode: str
user_role: Optional[str]
selected_template_ids: List[str]
template_names: List[str]
notification_email: Optional[str]
digest_enabled: bool
digest_frequency: str
wizard_completed: bool
is_active: bool
created_at: datetime
updated_at: datetime
class SubscriptionListResponse(BaseModel):
"""Response für Subscription-Liste."""
subscriptions: List[SubscriptionResponse]
total: int
class ActivateTemplateRequest(BaseModel):
"""Request für Template-Aktivierung."""
create_topics: bool = Field(default=True, description="Topics aus Template-Config erstellen")
create_rules: bool = Field(default=True, description="Rules aus Template-Config erstellen")
class ActivateTemplateResponse(BaseModel):
"""Response für Template-Aktivierung."""
status: str
topics_created: int
rules_created: int
profile_updated: bool
message: str
# ============================================================================
# Helper Functions
# ============================================================================
def get_user_id_from_request() -> str:
"""
Extrahiert User-ID aus Request.
TODO: JWT-Token auswerten, aktuell Dummy.
"""
return "demo-user"
def _get_template_names(db: DBSession, template_ids: List[str]) -> List[str]:
"""Hole Template-Namen für IDs."""
if not template_ids:
return []
templates = db.query(AlertTemplateDB).filter(
AlertTemplateDB.id.in_(template_ids)
).all()
return [t.name for t in templates]
def _subscription_to_response(sub: UserAlertSubscriptionDB, db: DBSession) -> SubscriptionResponse:
"""Konvertiere DB-Model zu Response."""
template_ids = sub.selected_template_ids or []
return SubscriptionResponse(
id=sub.id,
user_id=sub.user_id,
mode=sub.mode.value if sub.mode else "guided",
user_role=sub.user_role.value if sub.user_role else None,
selected_template_ids=template_ids,
template_names=_get_template_names(db, template_ids),
notification_email=sub.notification_email,
digest_enabled=sub.digest_enabled if sub.digest_enabled is not None else True,
digest_frequency=sub.digest_frequency or "weekly",
wizard_completed=sub.wizard_completed if sub.wizard_completed is not None else False,
is_active=sub.is_active if sub.is_active is not None else True,
created_at=sub.created_at,
updated_at=sub.updated_at,
)
# ============================================================================
# Endpoints
# ============================================================================
@router.post("", response_model=SubscriptionResponse)
async def create_subscription(
request: SubscriptionCreate,
db: DBSession = Depends(get_db)
):
"""
Erstelle eine neue Alert-Subscription.
Im Guided Mode werden 1-3 Templates ausgewählt.
Im Expert Mode wird ein eigenes Profil konfiguriert.
"""
user_id = get_user_id_from_request()
# Validiere Modus
try:
mode = AlertModeEnum(request.mode)
except ValueError:
raise HTTPException(status_code=400, detail="Ungültiger Modus. Erlaubt: 'guided', 'expert'")
# Validiere Rolle
user_role = None
if request.user_role:
try:
user_role = UserRoleEnum(request.user_role)
except ValueError:
raise HTTPException(
status_code=400,
detail="Ungültige Rolle. Erlaubt: 'lehrkraft', 'schulleitung', 'it_beauftragte'"
)
# Validiere Template-IDs
if request.template_ids:
if len(request.template_ids) > 3:
raise HTTPException(status_code=400, detail="Maximal 3 Templates erlaubt")
# Prüfe ob Templates existieren
existing = db.query(AlertTemplateDB).filter(
AlertTemplateDB.id.in_(request.template_ids)
).count()
if existing != len(request.template_ids):
raise HTTPException(status_code=400, detail="Eine oder mehrere Template-IDs sind ungültig")
# Erstelle Subscription
subscription = UserAlertSubscriptionDB(
id=str(uuid.uuid4()),
user_id=user_id,
mode=mode,
user_role=user_role,
selected_template_ids=request.template_ids,
notification_email=request.notification_email,
digest_enabled=request.digest_enabled,
digest_frequency=request.digest_frequency,
wizard_completed=len(request.template_ids) > 0, # Abgeschlossen wenn Templates gewählt
is_active=True,
)
db.add(subscription)
db.commit()
db.refresh(subscription)
return _subscription_to_response(subscription, db)
@router.get("", response_model=SubscriptionListResponse)
async def list_subscriptions(
active_only: bool = Query(True, description="Nur aktive Subscriptions"),
db: DBSession = Depends(get_db)
):
"""Liste alle Subscriptions des aktuellen Users."""
user_id = get_user_id_from_request()
query = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.user_id == user_id
)
if active_only:
query = query.filter(UserAlertSubscriptionDB.is_active == True)
subscriptions = query.order_by(UserAlertSubscriptionDB.created_at.desc()).all()
return SubscriptionListResponse(
subscriptions=[_subscription_to_response(s, db) for s in subscriptions],
total=len(subscriptions)
)
@router.get("/{subscription_id}", response_model=SubscriptionResponse)
async def get_subscription(
subscription_id: str,
db: DBSession = Depends(get_db)
):
"""Hole Details einer Subscription."""
user_id = get_user_id_from_request()
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.id == subscription_id,
UserAlertSubscriptionDB.user_id == user_id
).first()
if not subscription:
raise HTTPException(status_code=404, detail="Subscription nicht gefunden")
return _subscription_to_response(subscription, db)
@router.put("/{subscription_id}", response_model=SubscriptionResponse)
async def update_subscription(
subscription_id: str,
request: SubscriptionUpdate,
db: DBSession = Depends(get_db)
):
"""Aktualisiere eine Subscription."""
user_id = get_user_id_from_request()
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.id == subscription_id,
UserAlertSubscriptionDB.user_id == user_id
).first()
if not subscription:
raise HTTPException(status_code=404, detail="Subscription nicht gefunden")
# Update Felder
if request.template_ids is not None:
if len(request.template_ids) > 3:
raise HTTPException(status_code=400, detail="Maximal 3 Templates erlaubt")
subscription.selected_template_ids = request.template_ids
if request.notification_email is not None:
subscription.notification_email = request.notification_email
if request.digest_enabled is not None:
subscription.digest_enabled = request.digest_enabled
if request.digest_frequency is not None:
subscription.digest_frequency = request.digest_frequency
if request.is_active is not None:
subscription.is_active = request.is_active
subscription.updated_at = datetime.utcnow()
db.commit()
db.refresh(subscription)
return _subscription_to_response(subscription, db)
@router.delete("/{subscription_id}")
async def deactivate_subscription(
subscription_id: str,
db: DBSession = Depends(get_db)
):
"""Deaktiviere eine Subscription (Soft-Delete)."""
user_id = get_user_id_from_request()
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.id == subscription_id,
UserAlertSubscriptionDB.user_id == user_id
).first()
if not subscription:
raise HTTPException(status_code=404, detail="Subscription nicht gefunden")
subscription.is_active = False
subscription.updated_at = datetime.utcnow()
db.commit()
return {"status": "success", "message": "Subscription deaktiviert"}
@router.post("/{subscription_id}/activate-template", response_model=ActivateTemplateResponse)
async def activate_template(
subscription_id: str,
request: ActivateTemplateRequest = None,
db: DBSession = Depends(get_db)
):
"""
Aktiviere die gewählten Templates für eine Subscription.
Erstellt:
- Topics aus Template.topics_config (RSS-Feeds)
- Rules aus Template.rules_config (Filter-Regeln)
- Aktualisiert das User-Profil mit Template.profile_config
"""
user_id = get_user_id_from_request()
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.id == subscription_id,
UserAlertSubscriptionDB.user_id == user_id
).first()
if not subscription:
raise HTTPException(status_code=404, detail="Subscription nicht gefunden")
if not subscription.selected_template_ids:
raise HTTPException(status_code=400, detail="Keine Templates ausgewählt")
# Lade Templates
templates = db.query(AlertTemplateDB).filter(
AlertTemplateDB.id.in_(subscription.selected_template_ids)
).all()
topics_created = 0
rules_created = 0
profile_updated = False
for template in templates:
# Topics erstellen
if request is None or request.create_topics:
for topic_config in (template.topics_config or []):
topic = AlertTopicDB(
id=str(uuid.uuid4()),
user_id=user_id,
name=topic_config.get("name", f"Topic from {template.name}"),
description=f"Automatisch erstellt aus Template: {template.name}",
is_active=True,
fetch_interval_minutes=60,
)
db.add(topic)
topics_created += 1
# Rules erstellen
if request is None or request.create_rules:
for rule_config in (template.rules_config or []):
rule = AlertRuleDB(
id=str(uuid.uuid4()),
user_id=user_id,
name=rule_config.get("name", f"Rule from {template.name}"),
description=f"Automatisch erstellt aus Template: {template.name}",
conditions=rule_config.get("conditions", []),
action_type=rule_config.get("action_type", "keep"),
action_config=rule_config.get("action_config", {}),
priority=rule_config.get("priority", 50),
is_active=True,
)
db.add(rule)
rules_created += 1
# Profil aktualisieren
if template.profile_config:
profile = db.query(AlertProfileDB).filter(
AlertProfileDB.user_id == user_id
).first()
if not profile:
profile = AlertProfileDB(
id=str(uuid.uuid4()),
user_id=user_id,
name=f"Profil für {user_id}",
)
db.add(profile)
# Merge priorities
existing_priorities = profile.priorities or []
new_priorities = template.profile_config.get("priorities", [])
for p in new_priorities:
if p not in existing_priorities:
existing_priorities.append(p)
profile.priorities = existing_priorities
# Merge exclusions
existing_exclusions = profile.exclusions or []
new_exclusions = template.profile_config.get("exclusions", [])
for e in new_exclusions:
if e not in existing_exclusions:
existing_exclusions.append(e)
profile.exclusions = existing_exclusions
profile_updated = True
# Markiere Wizard als abgeschlossen
subscription.wizard_completed = True
subscription.updated_at = datetime.utcnow()
db.commit()
return ActivateTemplateResponse(
status="success",
topics_created=topics_created,
rules_created=rules_created,
profile_updated=profile_updated,
message=f"Templates aktiviert: {topics_created} Topics, {rules_created} Rules erstellt."
)

View File

@@ -0,0 +1,410 @@
"""
API Routes für Alert-Templates (Playbooks).
Endpoints für Guided Mode:
- GET /templates - Liste aller verfügbaren Templates
- GET /templates/{template_id} - Template-Details
- POST /templates/{template_id}/preview - Vorschau generieren
- GET /templates/by-role/{role} - Templates für eine Rolle
"""
from typing import Optional, List
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, Field
from sqlalchemy.orm import Session as DBSession
from ..db.database import get_db
from ..db.models import AlertTemplateDB, UserRoleEnum
router = APIRouter(prefix="/templates", tags=["templates"])
# ============================================================================
# Request/Response Models
# ============================================================================
class TemplateListItem(BaseModel):
"""Kurzinfo für Template-Liste."""
id: str
slug: str
name: str
description: str
icon: str
category: str
target_roles: List[str]
is_premium: bool
max_cards_per_day: int
sort_order: int
class Config:
from_attributes = True
class TemplateDetail(BaseModel):
"""Vollständige Template-Details."""
id: str
slug: str
name: str
description: str
icon: str
category: str
target_roles: List[str]
topics_config: List[dict]
rules_config: List[dict]
profile_config: dict
importance_config: dict
max_cards_per_day: int
digest_enabled: bool
digest_day: str
is_premium: bool
is_active: bool
class Config:
from_attributes = True
class TemplateListResponse(BaseModel):
"""Response für Template-Liste."""
templates: List[TemplateListItem]
total: int
class PreviewRequest(BaseModel):
"""Request für Template-Vorschau."""
sample_count: int = Field(default=3, ge=1, le=5)
class PreviewItem(BaseModel):
"""Ein Vorschau-Item."""
title: str
snippet: str
importance_level: str
why_relevant: str
source_name: str
class PreviewResponse(BaseModel):
"""Response für Template-Vorschau."""
template_name: str
sample_items: List[PreviewItem]
estimated_daily_count: str
message: str
# ============================================================================
# Endpoints
# ============================================================================
@router.get("", response_model=TemplateListResponse)
async def list_templates(
category: Optional[str] = Query(None, description="Filter nach Kategorie"),
role: Optional[str] = Query(None, description="Filter nach Zielrolle"),
include_premium: bool = Query(True, description="Premium-Templates einschließen"),
db: DBSession = Depends(get_db)
):
"""
Liste alle verfügbaren Alert-Templates.
Templates sind vorkonfigurierte Playbooks für bestimmte Themen
(Förderprogramme, Datenschutz, IT-Security, etc.).
"""
query = db.query(AlertTemplateDB).filter(AlertTemplateDB.is_active == True)
if category:
query = query.filter(AlertTemplateDB.category == category)
if not include_premium:
query = query.filter(AlertTemplateDB.is_premium == False)
templates = query.order_by(AlertTemplateDB.sort_order).all()
# Filter nach Rolle (JSON-Feld)
if role:
templates = [t for t in templates if role in (t.target_roles or [])]
return TemplateListResponse(
templates=[
TemplateListItem(
id=t.id,
slug=t.slug,
name=t.name,
description=t.description,
icon=t.icon or "",
category=t.category or "",
target_roles=t.target_roles or [],
is_premium=t.is_premium or False,
max_cards_per_day=t.max_cards_per_day or 10,
sort_order=t.sort_order or 0,
)
for t in templates
],
total=len(templates)
)
@router.get("/by-role/{role}", response_model=TemplateListResponse)
async def get_templates_by_role(
role: str,
db: DBSession = Depends(get_db)
):
"""
Empfohlene Templates für eine bestimmte Rolle.
Rollen:
- lehrkraft: Fokus auf Unterricht, Fortbildungen, Wettbewerbe
- schulleitung: Fokus auf Administration, Fördermittel, Recht
- it_beauftragte: Fokus auf IT-Security, Datenschutz
"""
# Validiere Rolle
valid_roles = ["lehrkraft", "schulleitung", "it_beauftragte"]
if role not in valid_roles:
raise HTTPException(
status_code=400,
detail=f"Ungültige Rolle. Erlaubt: {', '.join(valid_roles)}"
)
templates = db.query(AlertTemplateDB).filter(
AlertTemplateDB.is_active == True,
AlertTemplateDB.is_premium == False # Nur kostenlose für Empfehlungen
).order_by(AlertTemplateDB.sort_order).all()
# Filter nach Rolle
filtered = [t for t in templates if role in (t.target_roles or [])]
return TemplateListResponse(
templates=[
TemplateListItem(
id=t.id,
slug=t.slug,
name=t.name,
description=t.description,
icon=t.icon or "",
category=t.category or "",
target_roles=t.target_roles or [],
is_premium=t.is_premium or False,
max_cards_per_day=t.max_cards_per_day or 10,
sort_order=t.sort_order or 0,
)
for t in filtered
],
total=len(filtered)
)
@router.get("/{template_id}", response_model=TemplateDetail)
async def get_template(
template_id: str,
db: DBSession = Depends(get_db)
):
"""
Vollständige Details eines Templates abrufen.
Enthält alle Konfigurationen (Topics, Rules, Profile).
"""
template = db.query(AlertTemplateDB).filter(
AlertTemplateDB.id == template_id
).first()
if not template:
# Versuche nach Slug zu suchen
template = db.query(AlertTemplateDB).filter(
AlertTemplateDB.slug == template_id
).first()
if not template:
raise HTTPException(status_code=404, detail="Template nicht gefunden")
return TemplateDetail(
id=template.id,
slug=template.slug,
name=template.name,
description=template.description,
icon=template.icon or "",
category=template.category or "",
target_roles=template.target_roles or [],
topics_config=template.topics_config or [],
rules_config=template.rules_config or [],
profile_config=template.profile_config or {},
importance_config=template.importance_config or {},
max_cards_per_day=template.max_cards_per_day or 10,
digest_enabled=template.digest_enabled if template.digest_enabled is not None else True,
digest_day=template.digest_day or "monday",
is_premium=template.is_premium or False,
is_active=template.is_active if template.is_active is not None else True,
)
@router.post("/{template_id}/preview", response_model=PreviewResponse)
async def preview_template(
template_id: str,
request: PreviewRequest = None,
db: DBSession = Depends(get_db)
):
"""
Generiere eine Vorschau, wie Alerts für dieses Template aussehen würden.
Zeigt Beispiel-Alerts mit Wichtigkeitsstufen und "Warum relevant?"-Erklärungen.
"""
template = db.query(AlertTemplateDB).filter(
AlertTemplateDB.id == template_id
).first()
if not template:
template = db.query(AlertTemplateDB).filter(
AlertTemplateDB.slug == template_id
).first()
if not template:
raise HTTPException(status_code=404, detail="Template nicht gefunden")
# Generiere Beispiel-Alerts basierend auf Template-Konfiguration
sample_items = _generate_preview_items(template)
return PreviewResponse(
template_name=template.name,
sample_items=sample_items[:request.sample_count if request else 3],
estimated_daily_count=f"Ca. {template.max_cards_per_day} Meldungen pro Tag",
message=f"Diese Vorschau zeigt, wie Alerts für '{template.name}' aussehen würden."
)
@router.post("/seed")
async def seed_templates(
force_update: bool = Query(False, description="Bestehende Templates aktualisieren"),
db: DBSession = Depends(get_db)
):
"""
Fügt die vordefinierten Templates in die Datenbank ein.
Nur für Entwicklung/Setup.
"""
from ..data.templates import seed_templates as do_seed
count = do_seed(db, force_update=force_update)
return {
"status": "success",
"templates_created": count,
"message": f"{count} Templates wurden eingefügt/aktualisiert."
}
# ============================================================================
# Helper Functions
# ============================================================================
def _generate_preview_items(template: AlertTemplateDB) -> List[PreviewItem]:
"""
Generiere Beispiel-Alerts für Template-Vorschau.
Diese sind statisch/exemplarisch, nicht aus echten Daten.
"""
# Template-spezifische Beispiele
examples = {
"foerderprogramme": [
PreviewItem(
title="DigitalPakt 2.0: Neue Antragsphase startet am 1. April",
snippet="Das BMBF hat die zweite Phase des DigitalPakt Schule angekündigt...",
importance_level="DRINGEND",
why_relevant="Frist endet in 45 Tagen. Betrifft alle Schulen mit Förderbedarf.",
source_name="Bundesministerium für Bildung"
),
PreviewItem(
title="Landesförderung: 50.000€ für innovative Schulprojekte",
snippet="Das Kultusministerium fördert Schulen, die digitale Lernkonzepte...",
importance_level="WICHTIG",
why_relevant="Passende Förderung für Ihr Bundesland. Keine Eigenbeteiligung erforderlich.",
source_name="Kultusministerium"
),
PreviewItem(
title="Erasmus+ Schulpartnerschaften: Jetzt bewerben",
snippet="Für das Schuljahr 2026/27 können Schulen EU-Förderung beantragen...",
importance_level="PRUEFEN",
why_relevant="EU-Programm mit hoher Fördersumme. Bewerbungsfrist in 3 Monaten.",
source_name="EU-Kommission"
),
],
"abitur-updates": [
PreviewItem(
title="Neue Operatoren für Abitur Deutsch ab 2027",
snippet="Die KMK hat überarbeitete Operatoren für das Fach Deutsch beschlossen...",
importance_level="WICHTIG",
why_relevant="Betrifft die Oberstufenplanung. Anpassung der Klausuren erforderlich.",
source_name="KMK"
),
PreviewItem(
title="Abiturtermine 2026: Prüfungsplan veröffentlicht",
snippet="Das Kultusministerium hat die Termine für das Abitur 2026 bekannt gegeben...",
importance_level="INFO",
why_relevant="Planungsgrundlage für Schuljahreskalender.",
source_name="Kultusministerium"
),
],
"datenschutz-recht": [
PreviewItem(
title="LfDI: Neue Handreichung zu Schülerfotos",
snippet="Der Landesbeauftragte für Datenschutz hat eine aktualisierte...",
importance_level="DRINGEND",
why_relevant="Handlungsbedarf: Bestehende Einwilligungen müssen geprüft werden.",
source_name="Datenschutzbeauftragter"
),
PreviewItem(
title="Microsoft 365 an Schulen: Neue Bewertung",
snippet="Die Datenschutzkonferenz hat ihre Position zu Microsoft 365 aktualisiert...",
importance_level="WICHTIG",
why_relevant="Betrifft Schulen mit Microsoft-Lizenzen. Dokumentationspflicht.",
source_name="DSK"
),
],
"it-security": [
PreviewItem(
title="CVE-2026-1234: Kritische Lücke in Moodle",
snippet="Eine schwerwiegende Sicherheitslücke wurde in Moodle 4.x gefunden...",
importance_level="KRITISCH",
why_relevant="Sofortiges Update erforderlich. Exploit bereits aktiv.",
source_name="BSI CERT-Bund"
),
PreviewItem(
title="Phishing-Welle: Gefälschte Schulportal-Mails",
snippet="Aktuell werden vermehrt Phishing-Mails an Lehrkräfte versendet...",
importance_level="DRINGEND",
why_relevant="Warnung an Kollegium empfohlen. Erkennungsmerkmale beachten.",
source_name="BSI"
),
],
"fortbildungen": [
PreviewItem(
title="Kostenlose Fortbildung: KI im Unterricht",
snippet="Das Landesinstitut bietet eine Online-Fortbildung zu KI-Tools...",
importance_level="PRUEFEN",
why_relevant="Passt zu Ihrem Interessenprofil. Online-Format, 4 Stunden.",
source_name="Landesinstitut"
),
],
"wettbewerbe-projekte": [
PreviewItem(
title="Jugend forscht: Anmeldung bis 30. November",
snippet="Der größte deutsche MINT-Wettbewerb sucht wieder junge Forscher...",
importance_level="WICHTIG",
why_relevant="Frist in 60 Tagen. Für Schüler ab Klasse 4.",
source_name="Jugend forscht e.V."
),
],
}
# Hole Beispiele für dieses Template oder generische
slug = template.slug
if slug in examples:
return examples[slug]
# Generische Beispiele
return [
PreviewItem(
title=f"Beispiel-Meldung für {template.name}",
snippet=f"Dies ist eine Vorschau, wie Alerts für das Thema '{template.name}' aussehen würden.",
importance_level="INFO",
why_relevant="Passend zu Ihren ausgewählten Themen.",
source_name="Beispielquelle"
)
]

View File

@@ -0,0 +1,405 @@
"""
Topic API Routes für Alerts Agent.
CRUD-Operationen für Alert-Topics (Feed-Quellen).
"""
from typing import List, Optional
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
from pydantic import BaseModel, Field, HttpUrl
from sqlalchemy.orm import Session as DBSession
from alerts_agent.db import get_db
from alerts_agent.db.repository import TopicRepository, AlertItemRepository
from alerts_agent.db.models import FeedTypeEnum
router = APIRouter(prefix="/topics", tags=["alerts"])
# =============================================================================
# PYDANTIC MODELS
# =============================================================================
class TopicCreate(BaseModel):
"""Request-Model für Topic-Erstellung."""
name: str = Field(..., min_length=1, max_length=255)
description: str = Field(default="", max_length=2000)
feed_url: Optional[str] = Field(default=None, max_length=2000)
feed_type: str = Field(default="rss") # rss, email, webhook
fetch_interval_minutes: int = Field(default=60, ge=5, le=1440)
is_active: bool = Field(default=True)
class TopicUpdate(BaseModel):
"""Request-Model für Topic-Update."""
name: Optional[str] = Field(default=None, min_length=1, max_length=255)
description: Optional[str] = Field(default=None, max_length=2000)
feed_url: Optional[str] = Field(default=None, max_length=2000)
feed_type: Optional[str] = None
fetch_interval_minutes: Optional[int] = Field(default=None, ge=5, le=1440)
is_active: Optional[bool] = None
class TopicResponse(BaseModel):
"""Response-Model für Topic."""
id: str
name: str
description: str
feed_url: Optional[str]
feed_type: str
is_active: bool
fetch_interval_minutes: int
last_fetched_at: Optional[datetime]
last_fetch_error: Optional[str]
total_items_fetched: int
items_kept: int
items_dropped: int
created_at: datetime
updated_at: datetime
class Config:
from_attributes = True
class TopicListResponse(BaseModel):
"""Response-Model für Topic-Liste."""
topics: List[TopicResponse]
total: int
class TopicStatsResponse(BaseModel):
"""Response-Model für Topic-Statistiken."""
topic_id: str
name: str
total_alerts: int
by_status: dict
by_decision: dict
keep_rate: Optional[float]
class FetchResultResponse(BaseModel):
"""Response-Model für manuellen Fetch."""
success: bool
topic_id: str
new_items: int
duplicates_skipped: int
error: Optional[str] = None
# =============================================================================
# API ENDPOINTS
# =============================================================================
@router.post("", response_model=TopicResponse, status_code=201)
async def create_topic(
topic: TopicCreate,
db: DBSession = Depends(get_db),
) -> TopicResponse:
"""
Erstellt ein neues Topic (Feed-Quelle).
Topics repräsentieren Google Alerts RSS-Feeds oder andere Feed-Quellen.
"""
repo = TopicRepository(db)
created = repo.create(
name=topic.name,
description=topic.description,
feed_url=topic.feed_url,
feed_type=topic.feed_type,
fetch_interval_minutes=topic.fetch_interval_minutes,
is_active=topic.is_active,
)
return TopicResponse(
id=created.id,
name=created.name,
description=created.description or "",
feed_url=created.feed_url,
feed_type=created.feed_type.value if created.feed_type else "rss",
is_active=created.is_active,
fetch_interval_minutes=created.fetch_interval_minutes,
last_fetched_at=created.last_fetched_at,
last_fetch_error=created.last_fetch_error,
total_items_fetched=created.total_items_fetched,
items_kept=created.items_kept,
items_dropped=created.items_dropped,
created_at=created.created_at,
updated_at=created.updated_at,
)
@router.get("", response_model=TopicListResponse)
async def list_topics(
is_active: Optional[bool] = None,
db: DBSession = Depends(get_db),
) -> TopicListResponse:
"""
Listet alle Topics auf.
Optional nach aktivem Status filterbar.
"""
repo = TopicRepository(db)
topics = repo.get_all(is_active=is_active)
return TopicListResponse(
topics=[
TopicResponse(
id=t.id,
name=t.name,
description=t.description or "",
feed_url=t.feed_url,
feed_type=t.feed_type.value if t.feed_type else "rss",
is_active=t.is_active,
fetch_interval_minutes=t.fetch_interval_minutes,
last_fetched_at=t.last_fetched_at,
last_fetch_error=t.last_fetch_error,
total_items_fetched=t.total_items_fetched,
items_kept=t.items_kept,
items_dropped=t.items_dropped,
created_at=t.created_at,
updated_at=t.updated_at,
)
for t in topics
],
total=len(topics),
)
@router.get("/{topic_id}", response_model=TopicResponse)
async def get_topic(
topic_id: str,
db: DBSession = Depends(get_db),
) -> TopicResponse:
"""
Ruft ein Topic nach ID ab.
"""
repo = TopicRepository(db)
topic = repo.get_by_id(topic_id)
if not topic:
raise HTTPException(status_code=404, detail="Topic nicht gefunden")
return TopicResponse(
id=topic.id,
name=topic.name,
description=topic.description or "",
feed_url=topic.feed_url,
feed_type=topic.feed_type.value if topic.feed_type else "rss",
is_active=topic.is_active,
fetch_interval_minutes=topic.fetch_interval_minutes,
last_fetched_at=topic.last_fetched_at,
last_fetch_error=topic.last_fetch_error,
total_items_fetched=topic.total_items_fetched,
items_kept=topic.items_kept,
items_dropped=topic.items_dropped,
created_at=topic.created_at,
updated_at=topic.updated_at,
)
@router.put("/{topic_id}", response_model=TopicResponse)
async def update_topic(
topic_id: str,
updates: TopicUpdate,
db: DBSession = Depends(get_db),
) -> TopicResponse:
"""
Aktualisiert ein Topic.
"""
repo = TopicRepository(db)
# Nur übergebene Werte updaten
update_dict = {k: v for k, v in updates.model_dump().items() if v is not None}
if not update_dict:
raise HTTPException(status_code=400, detail="Keine Updates angegeben")
updated = repo.update(topic_id, **update_dict)
if not updated:
raise HTTPException(status_code=404, detail="Topic nicht gefunden")
return TopicResponse(
id=updated.id,
name=updated.name,
description=updated.description or "",
feed_url=updated.feed_url,
feed_type=updated.feed_type.value if updated.feed_type else "rss",
is_active=updated.is_active,
fetch_interval_minutes=updated.fetch_interval_minutes,
last_fetched_at=updated.last_fetched_at,
last_fetch_error=updated.last_fetch_error,
total_items_fetched=updated.total_items_fetched,
items_kept=updated.items_kept,
items_dropped=updated.items_dropped,
created_at=updated.created_at,
updated_at=updated.updated_at,
)
@router.delete("/{topic_id}", status_code=204)
async def delete_topic(
topic_id: str,
db: DBSession = Depends(get_db),
):
"""
Löscht ein Topic und alle zugehörigen Alerts (CASCADE).
"""
repo = TopicRepository(db)
success = repo.delete(topic_id)
if not success:
raise HTTPException(status_code=404, detail="Topic nicht gefunden")
return None
@router.get("/{topic_id}/stats", response_model=TopicStatsResponse)
async def get_topic_stats(
topic_id: str,
db: DBSession = Depends(get_db),
) -> TopicStatsResponse:
"""
Ruft Statistiken für ein Topic ab.
"""
topic_repo = TopicRepository(db)
alert_repo = AlertItemRepository(db)
topic = topic_repo.get_by_id(topic_id)
if not topic:
raise HTTPException(status_code=404, detail="Topic nicht gefunden")
by_status = alert_repo.count_by_status(topic_id)
by_decision = alert_repo.count_by_decision(topic_id)
total = sum(by_status.values())
keep_count = by_decision.get("KEEP", 0)
return TopicStatsResponse(
topic_id=topic_id,
name=topic.name,
total_alerts=total,
by_status=by_status,
by_decision=by_decision,
keep_rate=keep_count / total if total > 0 else None,
)
@router.post("/{topic_id}/fetch", response_model=FetchResultResponse)
async def fetch_topic(
topic_id: str,
background_tasks: BackgroundTasks,
db: DBSession = Depends(get_db),
) -> FetchResultResponse:
"""
Löst einen manuellen Fetch für ein Topic aus.
Der Fetch wird im Hintergrund ausgeführt. Das Ergebnis zeigt
die Anzahl neuer Items und übersprungener Duplikate.
"""
topic_repo = TopicRepository(db)
topic = topic_repo.get_by_id(topic_id)
if not topic:
raise HTTPException(status_code=404, detail="Topic nicht gefunden")
if not topic.feed_url:
raise HTTPException(
status_code=400,
detail="Topic hat keine Feed-URL konfiguriert"
)
# Import hier um zirkuläre Imports zu vermeiden
from alerts_agent.ingestion.rss_fetcher import fetch_and_store_feed
try:
result = await fetch_and_store_feed(
topic_id=topic_id,
feed_url=topic.feed_url,
db=db,
)
return FetchResultResponse(
success=True,
topic_id=topic_id,
new_items=result.get("new_items", 0),
duplicates_skipped=result.get("duplicates_skipped", 0),
)
except Exception as e:
# Fehler im Topic speichern
topic_repo.update(topic_id, last_fetch_error=str(e))
return FetchResultResponse(
success=False,
topic_id=topic_id,
new_items=0,
duplicates_skipped=0,
error=str(e),
)
@router.post("/{topic_id}/activate", response_model=TopicResponse)
async def activate_topic(
topic_id: str,
db: DBSession = Depends(get_db),
) -> TopicResponse:
"""
Aktiviert ein Topic für automatisches Fetching.
"""
repo = TopicRepository(db)
updated = repo.update(topic_id, is_active=True)
if not updated:
raise HTTPException(status_code=404, detail="Topic nicht gefunden")
return TopicResponse(
id=updated.id,
name=updated.name,
description=updated.description or "",
feed_url=updated.feed_url,
feed_type=updated.feed_type.value if updated.feed_type else "rss",
is_active=updated.is_active,
fetch_interval_minutes=updated.fetch_interval_minutes,
last_fetched_at=updated.last_fetched_at,
last_fetch_error=updated.last_fetch_error,
total_items_fetched=updated.total_items_fetched,
items_kept=updated.items_kept,
items_dropped=updated.items_dropped,
created_at=updated.created_at,
updated_at=updated.updated_at,
)
@router.post("/{topic_id}/deactivate", response_model=TopicResponse)
async def deactivate_topic(
topic_id: str,
db: DBSession = Depends(get_db),
) -> TopicResponse:
"""
Deaktiviert ein Topic (stoppt automatisches Fetching).
"""
repo = TopicRepository(db)
updated = repo.update(topic_id, is_active=False)
if not updated:
raise HTTPException(status_code=404, detail="Topic nicht gefunden")
return TopicResponse(
id=updated.id,
name=updated.name,
description=updated.description or "",
feed_url=updated.feed_url,
feed_type=updated.feed_type.value if updated.feed_type else "rss",
is_active=updated.is_active,
fetch_interval_minutes=updated.fetch_interval_minutes,
last_fetched_at=updated.last_fetched_at,
last_fetch_error=updated.last_fetch_error,
total_items_fetched=updated.total_items_fetched,
items_kept=updated.items_kept,
items_dropped=updated.items_dropped,
created_at=updated.created_at,
updated_at=updated.updated_at,
)

View File

@@ -0,0 +1,554 @@
"""
API Routes für den Guided Mode Wizard.
Verwaltet den 3-Schritt Setup-Wizard:
1. Rolle wählen (Lehrkraft, Schulleitung, IT-Beauftragte)
2. Templates auswählen (max. 3)
3. Bestätigung und Aktivierung
Zusätzlich: Migration-Wizard für bestehende Google Alerts.
Endpoints:
- GET /wizard/state - Aktuellen Wizard-Status abrufen
- PUT /wizard/step/{step} - Schritt speichern
- POST /wizard/complete - Wizard abschließen
- POST /wizard/reset - Wizard zurücksetzen
- POST /wizard/migrate/email - E-Mail-Migration starten
- POST /wizard/migrate/rss - RSS-Import
"""
import uuid
from typing import Optional, List, Dict, Any
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, Field
from sqlalchemy.orm import Session as DBSession
from ..db.database import get_db
from ..db.models import (
UserAlertSubscriptionDB, AlertTemplateDB, AlertSourceDB,
AlertModeEnum, UserRoleEnum, MigrationModeEnum, FeedTypeEnum
)
router = APIRouter(prefix="/wizard", tags=["wizard"])
# ============================================================================
# Request/Response Models
# ============================================================================
class WizardState(BaseModel):
"""Aktueller Wizard-Status."""
subscription_id: Optional[str] = None
current_step: int = 0 # 0=nicht gestartet, 1-3=Schritte, 4=abgeschlossen
is_completed: bool = False
step_data: Dict[str, Any] = {}
recommended_templates: List[Dict[str, Any]] = []
class Step1Data(BaseModel):
"""Daten für Schritt 1: Rollenwahl."""
role: str = Field(..., description="lehrkraft, schulleitung, it_beauftragte")
class Step2Data(BaseModel):
"""Daten für Schritt 2: Template-Auswahl."""
template_ids: List[str] = Field(..., min_length=1, max_length=3)
class Step3Data(BaseModel):
"""Daten für Schritt 3: Bestätigung."""
notification_email: Optional[str] = None
digest_enabled: bool = True
digest_frequency: str = "weekly"
class StepResponse(BaseModel):
"""Response für Schritt-Update."""
status: str
current_step: int
next_step: int
message: str
recommended_templates: List[Dict[str, Any]] = []
class MigrateEmailRequest(BaseModel):
"""Request für E-Mail-Migration."""
original_label: Optional[str] = Field(default=None, description="Beschreibung des Alerts")
class MigrateEmailResponse(BaseModel):
"""Response für E-Mail-Migration."""
status: str
inbound_address: str
instructions: List[str]
source_id: str
class MigrateRssRequest(BaseModel):
"""Request für RSS-Import."""
rss_urls: List[str] = Field(..., min_length=1, max_length=20)
labels: Optional[List[str]] = None
class MigrateRssResponse(BaseModel):
"""Response für RSS-Import."""
status: str
sources_created: int
topics_created: int
message: str
# ============================================================================
# Helper Functions
# ============================================================================
def get_user_id_from_request() -> str:
"""Extrahiert User-ID aus Request."""
return "demo-user"
def _get_or_create_subscription(db: DBSession, user_id: str) -> UserAlertSubscriptionDB:
"""Hole oder erstelle Subscription für Wizard."""
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.user_id == user_id,
UserAlertSubscriptionDB.wizard_completed == False
).first()
if not subscription:
subscription = UserAlertSubscriptionDB(
id=str(uuid.uuid4()),
user_id=user_id,
mode=AlertModeEnum.GUIDED,
wizard_step=0,
wizard_completed=False,
wizard_state={},
is_active=True,
)
db.add(subscription)
db.commit()
db.refresh(subscription)
return subscription
def _get_recommended_templates(db: DBSession, role: str) -> List[Dict[str, Any]]:
"""Hole empfohlene Templates für eine Rolle."""
templates = db.query(AlertTemplateDB).filter(
AlertTemplateDB.is_active == True,
AlertTemplateDB.is_premium == False
).order_by(AlertTemplateDB.sort_order).all()
result = []
for t in templates:
if role in (t.target_roles or []):
result.append({
"id": t.id,
"slug": t.slug,
"name": t.name,
"description": t.description,
"icon": t.icon,
"category": t.category,
"recommended": True,
})
return result
def _generate_inbound_address(user_id: str, source_id: str) -> str:
"""Generiere eindeutige Inbound-E-Mail-Adresse."""
short_id = source_id[:8]
return f"alerts+{short_id}@breakpilot.app"
# ============================================================================
# Wizard Endpoints
# ============================================================================
@router.get("/state", response_model=WizardState)
async def get_wizard_state(
db: DBSession = Depends(get_db)
):
"""
Hole aktuellen Wizard-Status.
Gibt Schritt, gespeicherte Daten und empfohlene Templates zurück.
"""
user_id = get_user_id_from_request()
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.user_id == user_id
).order_by(UserAlertSubscriptionDB.created_at.desc()).first()
if not subscription:
return WizardState(
subscription_id=None,
current_step=0,
is_completed=False,
step_data={},
recommended_templates=[],
)
# Empfohlene Templates basierend auf Rolle
role = subscription.user_role.value if subscription.user_role else None
recommended = _get_recommended_templates(db, role) if role else []
return WizardState(
subscription_id=subscription.id,
current_step=subscription.wizard_step or 0,
is_completed=subscription.wizard_completed or False,
step_data=subscription.wizard_state or {},
recommended_templates=recommended,
)
@router.put("/step/1", response_model=StepResponse)
async def save_step_1(
data: Step1Data,
db: DBSession = Depends(get_db)
):
"""
Schritt 1: Rolle speichern.
Wählt die Rolle des Nutzers und gibt passende Template-Empfehlungen.
"""
user_id = get_user_id_from_request()
# Validiere Rolle
try:
role = UserRoleEnum(data.role)
except ValueError:
raise HTTPException(
status_code=400,
detail="Ungültige Rolle. Erlaubt: 'lehrkraft', 'schulleitung', 'it_beauftragte'"
)
subscription = _get_or_create_subscription(db, user_id)
# Update
subscription.user_role = role
subscription.wizard_step = 1
wizard_state = subscription.wizard_state or {}
wizard_state["step1"] = {"role": data.role}
subscription.wizard_state = wizard_state
subscription.updated_at = datetime.utcnow()
db.commit()
db.refresh(subscription)
# Empfohlene Templates
recommended = _get_recommended_templates(db, data.role)
return StepResponse(
status="success",
current_step=1,
next_step=2,
message=f"Rolle '{data.role}' gespeichert. Bitte wählen Sie jetzt Ihre Themen.",
recommended_templates=recommended,
)
@router.put("/step/2", response_model=StepResponse)
async def save_step_2(
data: Step2Data,
db: DBSession = Depends(get_db)
):
"""
Schritt 2: Templates auswählen.
Speichert die ausgewählten Templates (1-3).
"""
user_id = get_user_id_from_request()
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.user_id == user_id,
UserAlertSubscriptionDB.wizard_completed == False
).first()
if not subscription:
raise HTTPException(status_code=400, detail="Bitte zuerst Schritt 1 abschließen")
# Validiere Template-IDs
templates = db.query(AlertTemplateDB).filter(
AlertTemplateDB.id.in_(data.template_ids)
).all()
if len(templates) != len(data.template_ids):
raise HTTPException(status_code=400, detail="Eine oder mehrere Template-IDs sind ungültig")
# Update
subscription.selected_template_ids = data.template_ids
subscription.wizard_step = 2
wizard_state = subscription.wizard_state or {}
wizard_state["step2"] = {
"template_ids": data.template_ids,
"template_names": [t.name for t in templates],
}
subscription.wizard_state = wizard_state
subscription.updated_at = datetime.utcnow()
db.commit()
return StepResponse(
status="success",
current_step=2,
next_step=3,
message=f"{len(templates)} Themen ausgewählt. Bitte bestätigen Sie Ihre Auswahl.",
recommended_templates=[],
)
@router.put("/step/3", response_model=StepResponse)
async def save_step_3(
data: Step3Data,
db: DBSession = Depends(get_db)
):
"""
Schritt 3: Digest-Einstellungen und Bestätigung.
Speichert E-Mail und Digest-Präferenzen.
"""
user_id = get_user_id_from_request()
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.user_id == user_id,
UserAlertSubscriptionDB.wizard_completed == False
).first()
if not subscription:
raise HTTPException(status_code=400, detail="Bitte zuerst Schritte 1 und 2 abschließen")
if not subscription.selected_template_ids:
raise HTTPException(status_code=400, detail="Bitte zuerst Templates auswählen (Schritt 2)")
# Update
subscription.notification_email = data.notification_email
subscription.digest_enabled = data.digest_enabled
subscription.digest_frequency = data.digest_frequency
subscription.wizard_step = 3
wizard_state = subscription.wizard_state or {}
wizard_state["step3"] = {
"notification_email": data.notification_email,
"digest_enabled": data.digest_enabled,
"digest_frequency": data.digest_frequency,
}
subscription.wizard_state = wizard_state
subscription.updated_at = datetime.utcnow()
db.commit()
return StepResponse(
status="success",
current_step=3,
next_step=4,
message="Einstellungen gespeichert. Klicken Sie auf 'Jetzt starten' um den Wizard abzuschließen.",
recommended_templates=[],
)
@router.post("/complete")
async def complete_wizard(
db: DBSession = Depends(get_db)
):
"""
Wizard abschließen und Templates aktivieren.
Erstellt Topics, Rules und Profile basierend auf den gewählten Templates.
"""
user_id = get_user_id_from_request()
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.user_id == user_id,
UserAlertSubscriptionDB.wizard_completed == False
).first()
if not subscription:
raise HTTPException(status_code=400, detail="Kein aktiver Wizard gefunden")
if not subscription.selected_template_ids:
raise HTTPException(status_code=400, detail="Bitte zuerst Templates auswählen")
# Aktiviere Templates (über Subscription-Endpoint)
from .subscriptions import activate_template, ActivateTemplateRequest
# Markiere als abgeschlossen
subscription.wizard_completed = True
subscription.wizard_step = 4
subscription.updated_at = datetime.utcnow()
db.commit()
return {
"status": "success",
"message": "Wizard abgeschlossen! Ihre Alerts werden ab jetzt gesammelt.",
"subscription_id": subscription.id,
"selected_templates": subscription.selected_template_ids,
"next_action": "Besuchen Sie die Inbox, um Ihre ersten Alerts zu sehen.",
}
@router.post("/reset")
async def reset_wizard(
db: DBSession = Depends(get_db)
):
"""Wizard zurücksetzen (für Neustart)."""
user_id = get_user_id_from_request()
subscription = db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.user_id == user_id,
UserAlertSubscriptionDB.wizard_completed == False
).first()
if subscription:
db.delete(subscription)
db.commit()
return {
"status": "success",
"message": "Wizard zurückgesetzt. Sie können neu beginnen.",
}
# ============================================================================
# Migration Endpoints (für bestehende Google Alerts)
# ============================================================================
@router.post("/migrate/email", response_model=MigrateEmailResponse)
async def start_email_migration(
request: MigrateEmailRequest = None,
db: DBSession = Depends(get_db)
):
"""
Starte E-Mail-Migration für bestehende Google Alerts.
Generiert eine eindeutige Inbound-E-Mail-Adresse, an die der Nutzer
seine Google Alerts weiterleiten kann.
"""
user_id = get_user_id_from_request()
# Erstelle AlertSource
source = AlertSourceDB(
id=str(uuid.uuid4()),
user_id=user_id,
source_type=FeedTypeEnum.EMAIL,
original_label=request.original_label if request else "Google Alert Migration",
migration_mode=MigrationModeEnum.FORWARD,
is_active=True,
)
# Generiere Inbound-Adresse
source.inbound_address = _generate_inbound_address(user_id, source.id)
db.add(source)
db.commit()
db.refresh(source)
return MigrateEmailResponse(
status="success",
inbound_address=source.inbound_address,
source_id=source.id,
instructions=[
"1. Öffnen Sie Google Alerts (google.com/alerts)",
"2. Klicken Sie auf das Bearbeiten-Symbol bei Ihrem Alert",
f"3. Ändern Sie die E-Mail-Adresse zu: {source.inbound_address}",
"4. Speichern Sie die Änderung",
"5. Ihre Alerts werden automatisch importiert und gefiltert",
],
)
@router.post("/migrate/rss", response_model=MigrateRssResponse)
async def import_rss_feeds(
request: MigrateRssRequest,
db: DBSession = Depends(get_db)
):
"""
Importiere bestehende Google Alert RSS-Feeds.
Erstellt für jede RSS-URL einen AlertSource und Topic.
"""
user_id = get_user_id_from_request()
from ..db.models import AlertTopicDB
sources_created = 0
topics_created = 0
for i, url in enumerate(request.rss_urls):
# Label aus Request oder generieren
label = None
if request.labels and i < len(request.labels):
label = request.labels[i]
if not label:
label = f"RSS Feed {i + 1}"
# Erstelle AlertSource
source = AlertSourceDB(
id=str(uuid.uuid4()),
user_id=user_id,
source_type=FeedTypeEnum.RSS,
original_label=label,
rss_url=url,
migration_mode=MigrationModeEnum.IMPORT,
is_active=True,
)
db.add(source)
sources_created += 1
# Erstelle Topic
topic = AlertTopicDB(
id=str(uuid.uuid4()),
user_id=user_id,
name=label,
description=f"Importiert aus RSS: {url[:50]}...",
feed_url=url,
feed_type=FeedTypeEnum.RSS,
is_active=True,
fetch_interval_minutes=60,
)
db.add(topic)
# Verknüpfe Source mit Topic
source.topic_id = topic.id
topics_created += 1
db.commit()
return MigrateRssResponse(
status="success",
sources_created=sources_created,
topics_created=topics_created,
message=f"{sources_created} RSS-Feeds importiert. Die Alerts werden automatisch abgerufen.",
)
@router.get("/migrate/sources")
async def list_migration_sources(
db: DBSession = Depends(get_db)
):
"""Liste alle Migration-Quellen des Users."""
user_id = get_user_id_from_request()
sources = db.query(AlertSourceDB).filter(
AlertSourceDB.user_id == user_id
).order_by(AlertSourceDB.created_at.desc()).all()
return {
"sources": [
{
"id": s.id,
"type": s.source_type.value if s.source_type else "unknown",
"label": s.original_label,
"inbound_address": s.inbound_address,
"rss_url": s.rss_url,
"migration_mode": s.migration_mode.value if s.migration_mode else "unknown",
"items_received": s.items_received,
"is_active": s.is_active,
"created_at": s.created_at.isoformat() if s.created_at else None,
}
for s in sources
],
"total": len(sources),
}

View File

@@ -0,0 +1,8 @@
"""
Alert Template Seed Data.
Enthält vorkonfigurierte Templates (Playbooks) für den Guided Mode.
"""
from .templates import ALERT_TEMPLATES, seed_templates
__all__ = ["ALERT_TEMPLATES", "seed_templates"]

View File

@@ -0,0 +1,492 @@
"""
Vorkonfigurierte Alert-Templates (Playbooks) für den Guided Mode.
Diese Templates ermöglichen Lehrern und Schulleitungen einen schnellen Einstieg
ohne RSS-Feeds oder Keywords manuell konfigurieren zu müssen.
Alle Texte in B1/B2 Deutsch, keine IT-Fachbegriffe.
"""
from typing import List, Dict, Any
from sqlalchemy.orm import Session
import uuid
# Standard Importance-Mapping (Score → 5 Stufen)
DEFAULT_IMPORTANCE_CONFIG = {
"kritisch": 0.90, # Ab 90% → Kritisch
"dringend": 0.75, # 75-90% → Dringend
"wichtig": 0.60, # 60-75% → Wichtig
"pruefen": 0.40, # 40-60% → Zu prüfen
# Alles unter 40% → Info
}
# Importance-Mapping für zeitkritische Templates (z.B. Fristen)
DEADLINE_IMPORTANCE_CONFIG = {
"kritisch": 0.85,
"dringend": 0.70,
"wichtig": 0.55,
"pruefen": 0.35,
}
# Importance-Mapping für IT-Security (höhere Schwellen)
SECURITY_IMPORTANCE_CONFIG = {
"kritisch": 0.95,
"dringend": 0.85,
"wichtig": 0.70,
"pruefen": 0.50,
}
ALERT_TEMPLATES: List[Dict[str, Any]] = [
# =========================================================================
# 1. Förderprogramme & Fristen
# =========================================================================
{
"slug": "foerderprogramme",
"name": "Förderprogramme & Fristen",
"description": "Bleiben Sie informiert über Förderanträge, Deadlines und neue Programme für Schulen. Verpassen Sie keine Fristen mehr.",
"icon": "💰",
"category": "administration",
"target_roles": ["schulleitung"],
"topics_config": [
{
"name": "DigitalPakt & Bundes-Förderprogramme",
"keywords": ["DigitalPakt", "Förderprogramm Schule", "Bundesförderung Bildung", "BMBF Schule"],
},
{
"name": "Landesförderung Bildung",
"keywords": ["Landesförderung Schule", "Kultusministerium Förderung", "Schulträger Fördermittel"],
},
{
"name": "EU & Stiftungen",
"keywords": ["Erasmus+ Schule", "EU-Förderung Bildung", "Stiftung Schule", "ESF Bildung"],
},
],
"rules_config": [
{
"name": "Fristen priorisieren",
"conditions": [{"field": "title", "op": "in", "value": ["Frist", "Deadline", "bis zum", "Antragsfrist", "endet am"]}],
"action_type": "tag",
"action_config": {"tags": ["frist"]},
"priority": 100,
},
{
"name": "Ausschluss Stellenanzeigen",
"conditions": [{"field": "title", "op": "in", "value": ["Stellenanzeige", "Wir suchen", "Job"]}],
"action_type": "drop",
"priority": 90,
},
],
"profile_config": {
"priorities": [
{"label": "Fördermittel", "weight": 0.95, "keywords": ["Fördermittel", "Zuschuss", "Antrag", "Förderung"], "description": "Finanzielle Förderung für Schulen"},
{"label": "Fristen", "weight": 0.90, "keywords": ["Frist", "Deadline", "Stichtag", "Bewerbungsschluss"], "description": "Zeitkritische Informationen"},
{"label": "Digitalisierung", "weight": 0.80, "keywords": ["DigitalPakt", "Tablet", "WLAN", "digitale Ausstattung"], "description": "IT-Ausstattung und Infrastruktur"},
],
"exclusions": ["Stellenanzeige", "Werbung", "Seminar buchen", "Anzeige"],
},
"importance_config": DEADLINE_IMPORTANCE_CONFIG,
"max_cards_per_day": 8,
"sort_order": 1,
},
# =========================================================================
# 2. Abitur & Prüfungs-Updates
# =========================================================================
{
"slug": "abitur-updates",
"name": "Abitur & Prüfungs-Updates",
"description": "Aktuelle Informationen zu Abitur-Regelungen, Prüfungsformaten, EPA und KMK-Beschlüssen. Wichtig für alle Oberstufenlehrkräfte.",
"icon": "📝",
"category": "teaching",
"target_roles": ["lehrkraft", "schulleitung"],
"topics_config": [
{
"name": "Abitur-Vorgaben",
"keywords": ["Abitur Vorgaben", "Prüfungsaufgaben Abitur", "Abiturprüfung Änderung"],
},
{
"name": "KMK & EPA",
"keywords": ["KMK Beschluss", "EPA Abitur", "Bildungsstandards Abitur", "Operatoren Abitur"],
},
{
"name": "Prüfungsformate",
"keywords": ["Prüfungsformat Schule", "Klausur Oberstufe", "mündliche Prüfung Abitur"],
},
],
"rules_config": [
{
"name": "Kernfächer priorisieren",
"conditions": [{"field": "title", "op": "in", "value": ["Deutsch", "Mathematik", "Englisch", "Leistungskurs"]}],
"action_type": "tag",
"action_config": {"tags": ["kernfach"]},
"priority": 80,
},
],
"profile_config": {
"priorities": [
{"label": "Abitur-Änderungen", "weight": 0.95, "keywords": ["Abitur", "Prüfung", "Zentralabitur"], "description": "Änderungen an Prüfungsregelungen"},
{"label": "KMK-Beschlüsse", "weight": 0.85, "keywords": ["KMK", "Kultusministerkonferenz", "Bildungsstandards"], "description": "Bundesweite Regelungen"},
{"label": "Bewertung", "weight": 0.75, "keywords": ["Bewertung", "Notenschlüssel", "Erwartungshorizont"], "description": "Bewertungskriterien"},
],
"exclusions": ["Nachhilfe", "Abiturtraining", "Lernhilfe kaufen"],
},
"importance_config": DEFAULT_IMPORTANCE_CONFIG,
"max_cards_per_day": 6,
"sort_order": 2,
},
# =========================================================================
# 3. Fortbildungen für Lehrkräfte
# =========================================================================
{
"slug": "fortbildungen",
"name": "Fortbildungen für Lehrkräfte",
"description": "Relevante Fortbildungsangebote in Ihrer Region. Filtern Sie nach Fach, Format und Anbieter.",
"icon": "🎓",
"category": "teaching",
"target_roles": ["lehrkraft"],
"topics_config": [
{
"name": "Landesinstitut Fortbildungen",
"keywords": ["Fortbildung Lehrer", "Landesinstitut Lehrerfortbildung", "Pädagogische Fortbildung"],
},
{
"name": "Digitale Kompetenzen",
"keywords": ["Fortbildung digital", "Medienkompetenz Lehrer", "digitale Bildung Fortbildung"],
},
{
"name": "Fachfortbildungen",
"keywords": ["Fachfortbildung", "Unterrichtsentwicklung", "Didaktik Fortbildung"],
},
],
"rules_config": [
{
"name": "Online-Formate taggen",
"conditions": [{"field": "title", "op": "in", "value": ["Online", "Webinar", "digital", "virtuell"]}],
"action_type": "tag",
"action_config": {"tags": ["online"]},
"priority": 70,
},
{
"name": "Kostenpflichtige ausschließen",
"conditions": [{"field": "snippet", "op": "in", "value": ["kostenpflichtig", "Teilnahmegebühr", ""]}],
"action_type": "tag",
"action_config": {"tags": ["kostenpflichtig"]},
"priority": 60,
},
],
"profile_config": {
"priorities": [
{"label": "Kostenlose Fortbildungen", "weight": 0.90, "keywords": ["kostenlos", "kostenfrei", "Landesinstitut"], "description": "Staatliche Angebote"},
{"label": "Digitale Medien", "weight": 0.80, "keywords": ["digital", "Tablet", "Medienkompetenz"], "description": "Digitale Bildung"},
{"label": "Inklusion", "weight": 0.75, "keywords": ["Inklusion", "Förderbedarf", "Differenzierung"], "description": "Inklusiver Unterricht"},
],
"exclusions": ["Studium", "Bachelor", "Master", "Referendariat"],
},
"importance_config": DEFAULT_IMPORTANCE_CONFIG,
"max_cards_per_day": 10,
"sort_order": 3,
},
# =========================================================================
# 4. Datenschutz & Rechtsupdates
# =========================================================================
{
"slug": "datenschutz-recht",
"name": "Datenschutz & Rechtsupdates",
"description": "DSGVO-relevante Änderungen, Schulrecht und rechtliche Entwicklungen. Wichtig für Datenschutzbeauftragte und Schulleitungen.",
"icon": "⚖️",
"category": "administration",
"target_roles": ["schulleitung", "it_beauftragte"],
"topics_config": [
{
"name": "DSGVO Schule",
"keywords": ["DSGVO Schule", "Datenschutz Schüler", "Einwilligung Eltern", "personenbezogene Daten Schule"],
},
{
"name": "Schulrecht",
"keywords": ["Schulgesetz Änderung", "Schulordnung neu", "Schulrecht Urteil"],
},
{
"name": "Cloud & Software",
"keywords": ["Cloud Schule DSGVO", "Microsoft 365 Schule", "Videokonferenz Datenschutz"],
},
],
"rules_config": [
{
"name": "Urteile priorisieren",
"conditions": [{"field": "title", "op": "in", "value": ["Urteil", "Gericht", "Beschluss", "Aufsichtsbehörde"]}],
"action_type": "tag",
"action_config": {"tags": ["urteil"]},
"priority": 90,
},
{
"name": "Handlungsbedarf markieren",
"conditions": [{"field": "title", "op": "in", "value": ["ab sofort", "verpflichtend", "muss", "Frist"]}],
"action_type": "tag",
"action_config": {"tags": ["handlungsbedarf"]},
"priority": 85,
},
],
"profile_config": {
"priorities": [
{"label": "DSGVO-Compliance", "weight": 0.95, "keywords": ["DSGVO", "Datenschutz", "Aufsichtsbehörde", "Bußgeld"], "description": "Datenschutzrechtliche Vorgaben"},
{"label": "Schulrecht", "weight": 0.90, "keywords": ["Schulgesetz", "Verordnung", "Erlass"], "description": "Rechtliche Änderungen"},
{"label": "Cloud-Dienste", "weight": 0.80, "keywords": ["Cloud", "Microsoft", "Google", "Zoom"], "description": "Software und Dienste"},
],
"exclusions": ["Werbung", "Seminar buchen", "Beratung anfragen"],
},
"importance_config": DEADLINE_IMPORTANCE_CONFIG,
"max_cards_per_day": 8,
"sort_order": 4,
},
# =========================================================================
# 5. IT-Security Warnungen
# =========================================================================
{
"slug": "it-security",
"name": "IT-Security Warnungen",
"description": "Sicherheitswarnungen und Patches für Schul-IT-Systeme. Kritisch für IT-Beauftragte und Administratoren.",
"icon": "🔒",
"category": "it",
"target_roles": ["it_beauftragte"],
"topics_config": [
{
"name": "BSI & CERT Warnungen",
"keywords": ["BSI Warnung", "CERT-Bund", "Sicherheitslücke", "CVE Schule"],
},
{
"name": "Schul-Software Security",
"keywords": ["Moodle Sicherheit", "IServ Update", "WebUntis Sicherheit", "Nextcloud Patch"],
},
{
"name": "Phishing & Malware",
"keywords": ["Phishing Schule", "Ransomware Bildung", "Malware Warnung"],
},
],
"rules_config": [
{
"name": "CVE-Meldungen priorisieren",
"conditions": [{"field": "title", "op": "regex", "value": "CVE-\\d{4}-\\d+"}],
"action_type": "tag",
"action_config": {"tags": ["cve"]},
"priority": 100,
},
{
"name": "Kritische Patches",
"conditions": [{"field": "title", "op": "in", "value": ["kritisch", "Notfall-Patch", "sofort", "0-day"]}],
"action_type": "tag",
"action_config": {"tags": ["kritisch"]},
"priority": 95,
},
],
"profile_config": {
"priorities": [
{"label": "CVE-Warnungen", "weight": 0.98, "keywords": ["CVE", "Sicherheitslücke", "Schwachstelle", "Exploit"], "description": "Bekannte Sicherheitslücken"},
{"label": "Schul-Software", "weight": 0.90, "keywords": ["Moodle", "IServ", "WebUntis", "Nextcloud", "Schulportal"], "description": "Häufig genutzte Schulsoftware"},
{"label": "Patches", "weight": 0.85, "keywords": ["Patch", "Update", "Sicherheitsupdate", "Hotfix"], "description": "Sicherheitsupdates"},
],
"exclusions": ["Werbung", "Schulung kaufen", "Penetrationstest Angebot"],
},
"importance_config": SECURITY_IMPORTANCE_CONFIG,
"max_cards_per_day": 5,
"sort_order": 5,
},
# =========================================================================
# 6. Wettbewerbe & Projekte
# =========================================================================
{
"slug": "wettbewerbe-projekte",
"name": "Wettbewerbe & Projekte",
"description": "MINT-Wettbewerbe, Erasmus-Projekte, Schülerwettbewerbe und Schulpartnerschaften. Entdecken Sie Chancen für Ihre Schüler.",
"icon": "🏆",
"category": "teaching",
"target_roles": ["lehrkraft", "schulleitung"],
"topics_config": [
{
"name": "MINT-Wettbewerbe",
"keywords": ["MINT Wettbewerb Schule", "Jugend forscht", "Mathematik Olympiade", "Informatik Biber"],
},
{
"name": "Erasmus & Austausch",
"keywords": ["Erasmus+ Schule", "Schüleraustausch", "Schulpartnerschaft Europa"],
},
{
"name": "Kreativ & Sozial",
"keywords": ["Schülerwettbewerb Kunst", "Vorlesewettbewerb", "Umweltpreis Schule", "Sozialer Tag"],
},
],
"rules_config": [
{
"name": "Anmeldefristen",
"conditions": [{"field": "title", "op": "in", "value": ["Anmeldung", "Bewerbung", "Frist", "bis zum"]}],
"action_type": "tag",
"action_config": {"tags": ["frist"]},
"priority": 85,
},
],
"profile_config": {
"priorities": [
{"label": "Wettbewerbe", "weight": 0.90, "keywords": ["Wettbewerb", "Preis", "Auszeichnung", "Gewinner"], "description": "Schülerwettbewerbe"},
{"label": "Erasmus+", "weight": 0.85, "keywords": ["Erasmus", "EU-Programm", "Mobilität", "Austausch"], "description": "Europäische Programme"},
{"label": "MINT", "weight": 0.80, "keywords": ["MINT", "Naturwissenschaft", "Technik", "Informatik"], "description": "MINT-Bereich"},
],
"exclusions": ["Stellenanzeige", "Praktikum", "Ausbildung"],
},
"importance_config": DEADLINE_IMPORTANCE_CONFIG,
"max_cards_per_day": 8,
"sort_order": 6,
},
# =========================================================================
# 7. Personalmarkt (Optional/Premium)
# =========================================================================
{
"slug": "personalmarkt",
"name": "Personalmarkt & Stellen",
"description": "Stellenangebote für Lehrkräfte und Vertretungsstellen in Ihrer Region. Ideal für Schulleitungen mit Personalbedarf.",
"icon": "👥",
"category": "administration",
"target_roles": ["schulleitung"],
"is_premium": True,
"topics_config": [
{
"name": "Lehrerstellen",
"keywords": ["Lehrerstelle", "Lehrkraft gesucht", "Einstellung Lehrer"],
},
{
"name": "Vertretungslehrkräfte",
"keywords": ["Vertretungslehrkraft", "befristete Stelle Lehrer", "Krankheitsvertretung Schule"],
},
{
"name": "Schulsozialarbeit",
"keywords": ["Schulsozialarbeiter", "Sozialpädagoge Schule", "Schulpsychologe"],
},
],
"rules_config": [],
"profile_config": {
"priorities": [
{"label": "Festanstellung", "weight": 0.90, "keywords": ["unbefristet", "Festanstellung", "Planstelle"], "description": "Feste Stellen"},
{"label": "Region", "weight": 0.85, "keywords": [], "description": "Stellen in der Region"},
],
"exclusions": ["Nachhilfe", "Privatlehrer", "freiberuflich"],
},
"importance_config": DEFAULT_IMPORTANCE_CONFIG,
"max_cards_per_day": 10,
"sort_order": 7,
},
# =========================================================================
# 8. Krisenkommunikation (Optional/Premium)
# =========================================================================
{
"slug": "krisenkommunikation",
"name": "Krisenkommunikation",
"description": "Wichtige Meldungen für Schulen in Krisensituationen: Unwetter, Streiks, Verkehrsstörungen, Gesundheitswarnungen.",
"icon": "⚠️",
"category": "administration",
"target_roles": ["schulleitung"],
"is_premium": True,
"topics_config": [
{
"name": "Wetter & Naturereignisse",
"keywords": ["Unwetterwarnung Schule", "Schulausfall Wetter", "Hitzefrei"],
},
{
"name": "Verkehr & ÖPNV",
"keywords": ["Streik ÖPNV", "Verkehrsstörung", "Busausfall Schule"],
},
{
"name": "Gesundheit",
"keywords": ["Gesundheitswarnung Schule", "Infektionsgefahr", "Hygienemaßnahme"],
},
],
"rules_config": [
{
"name": "Sofortmeldungen",
"conditions": [{"field": "title", "op": "in", "value": ["Warnung", "Achtung", "Sofort", "Gefahr", "Ausfall"]}],
"action_type": "tag",
"action_config": {"tags": ["sofort"]},
"priority": 100,
},
],
"profile_config": {
"priorities": [
{"label": "Schulausfall", "weight": 0.98, "keywords": ["Schulausfall", "unterrichtsfrei", "Schule geschlossen"], "description": "Schulschließungen"},
{"label": "Warnungen", "weight": 0.95, "keywords": ["Warnung", "Gefahr", "Achtung"], "description": "Wichtige Warnungen"},
],
"exclusions": ["Werbung", "Versicherung"],
},
"importance_config": SECURITY_IMPORTANCE_CONFIG, # Schnelle Eskalation
"max_cards_per_day": 5,
"sort_order": 8,
},
]
def seed_templates(db: Session, force_update: bool = False) -> int:
"""
Fügt die vordefinierten Templates in die Datenbank ein.
Args:
db: SQLAlchemy Session
force_update: Wenn True, werden bestehende Templates aktualisiert
Returns:
Anzahl der eingefügten/aktualisierten Templates
"""
from alerts_agent.db.models import AlertTemplateDB
count = 0
for template_data in ALERT_TEMPLATES:
existing = db.query(AlertTemplateDB).filter_by(slug=template_data["slug"]).first()
if existing and not force_update:
continue
if existing and force_update:
# Update existing template
for key, value in template_data.items():
if hasattr(existing, key):
setattr(existing, key, value)
count += 1
else:
# Create new template
template = AlertTemplateDB(
id=str(uuid.uuid4()),
**template_data
)
db.add(template)
count += 1
db.commit()
return count
def get_templates_for_role(role: str) -> List[Dict[str, Any]]:
"""
Gibt empfohlene Templates für eine bestimmte Rolle zurück.
Args:
role: "lehrkraft", "schulleitung", oder "it_beauftragte"
Returns:
Liste der passenden Templates (sortiert nach Empfehlung)
"""
return [
t for t in ALERT_TEMPLATES
if role in t.get("target_roles", []) and not t.get("is_premium", False)
]
def get_template_by_slug(slug: str) -> Dict[str, Any] | None:
"""
Gibt ein Template anhand seines Slugs zurück.
"""
for t in ALERT_TEMPLATES:
if t["slug"] == slug:
return t
return None

View File

@@ -0,0 +1,34 @@
"""
Database Module für Alerts Agent.
Stellt PostgreSQL-Anbindung für Alert-Persistenz bereit.
Nutzt die gleiche Base wie classroom_engine für konsistente Migrationen.
"""
from .database import Base, SessionLocal, get_db, engine
from .models import (
AlertTopicDB,
AlertItemDB,
AlertRuleDB,
AlertProfileDB,
AlertSourceEnum,
AlertStatusEnum,
RelevanceDecisionEnum,
FeedTypeEnum,
RuleActionEnum,
)
__all__ = [
"Base",
"SessionLocal",
"get_db",
"engine",
"AlertTopicDB",
"AlertItemDB",
"AlertRuleDB",
"AlertProfileDB",
"AlertSourceEnum",
"AlertStatusEnum",
"RelevanceDecisionEnum",
"FeedTypeEnum",
"RuleActionEnum",
]

View File

@@ -0,0 +1,19 @@
"""
Database Configuration für Alerts Agent.
Nutzt die gleiche PostgreSQL-Instanz und Base wie Classroom Engine.
"""
# Re-export from classroom_engine for consistency
from classroom_engine.database import (
Base,
SessionLocal,
get_db,
engine,
DATABASE_URL,
)
def init_db():
"""Erstellt alle Tabellen (für Entwicklung)."""
from . import models # Import models to register them
Base.metadata.create_all(bind=engine)

View File

@@ -0,0 +1,636 @@
"""
SQLAlchemy Database Models für Alerts Agent.
Persistiert Topics, Alerts, Rules und Profile in PostgreSQL.
Nutzt die gleiche Base wie classroom_engine für konsistente Migrationen.
"""
from datetime import datetime
from sqlalchemy import (
Column, String, Integer, Float, DateTime, JSON,
Boolean, Text, Enum as SQLEnum, ForeignKey, Index
)
from sqlalchemy.orm import relationship
import enum
import uuid
# Import Base from classroom_engine for shared metadata
from classroom_engine.database import Base
class AlertSourceEnum(str, enum.Enum):
"""Quelle des Alerts."""
GOOGLE_ALERTS_RSS = "google_alerts_rss"
GOOGLE_ALERTS_EMAIL = "google_alerts_email"
RSS_FEED = "rss_feed"
WEBHOOK = "webhook"
MANUAL = "manual"
class AlertStatusEnum(str, enum.Enum):
"""Verarbeitungsstatus des Alerts."""
NEW = "new"
PROCESSED = "processed"
DUPLICATE = "duplicate"
SCORED = "scored"
REVIEWED = "reviewed"
ARCHIVED = "archived"
class RelevanceDecisionEnum(str, enum.Enum):
"""Relevanz-Entscheidung."""
KEEP = "KEEP"
DROP = "DROP"
REVIEW = "REVIEW"
class FeedTypeEnum(str, enum.Enum):
"""Typ der Feed-Quelle."""
RSS = "rss"
EMAIL = "email"
WEBHOOK = "webhook"
class RuleActionEnum(str, enum.Enum):
"""Aktionen für Regeln."""
KEEP = "keep"
DROP = "drop"
TAG = "tag"
EMAIL = "email"
WEBHOOK = "webhook"
SLACK = "slack"
class ImportanceLevelEnum(str, enum.Enum):
"""5-stufige Wichtigkeitsskala für Guided Mode."""
INFO = "info" # 0.0-0.4 - Informativ
PRUEFEN = "pruefen" # 0.4-0.6 - Zu prüfen
WICHTIG = "wichtig" # 0.6-0.75 - Wichtig
DRINGEND = "dringend" # 0.75-0.9 - Dringend
KRITISCH = "kritisch" # 0.9-1.0 - Kritisch
class AlertModeEnum(str, enum.Enum):
"""Modus für Alert-Nutzung."""
GUIDED = "guided" # Geführter Modus für Lehrer/Schulleitungen
EXPERT = "expert" # Experten-Modus für IT-affine Nutzer
class MigrationModeEnum(str, enum.Enum):
"""Wie wurden die Alerts migriert."""
FORWARD = "forward" # E-Mail-Weiterleitung
IMPORT = "import" # RSS-Import
RECONSTRUCTED = "reconstructed" # Automatisch rekonstruiert
class DigestStatusEnum(str, enum.Enum):
"""Status der Digest-Generierung."""
PENDING = "pending"
GENERATING = "generating"
SENT = "sent"
FAILED = "failed"
class UserRoleEnum(str, enum.Enum):
"""Rolle des Nutzers für Template-Empfehlungen."""
LEHRKRAFT = "lehrkraft"
SCHULLEITUNG = "schulleitung"
IT_BEAUFTRAGTE = "it_beauftragte"
class AlertTopicDB(Base):
"""
Alert Topic / Feed-Quelle.
Repräsentiert eine Google Alert-Konfiguration oder einen RSS-Feed.
"""
__tablename__ = 'alert_topics'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
user_id = Column(String(36), nullable=True, index=True) # Optional: Multi-User
# Topic-Details
name = Column(String(255), nullable=False)
description = Column(Text, default="")
# Feed-Konfiguration
feed_url = Column(String(2000), nullable=True)
feed_type = Column(
SQLEnum(FeedTypeEnum),
default=FeedTypeEnum.RSS,
nullable=False
)
# Scheduling
is_active = Column(Boolean, default=True, index=True)
fetch_interval_minutes = Column(Integer, default=60)
last_fetched_at = Column(DateTime, nullable=True)
last_fetch_error = Column(Text, nullable=True)
# Statistiken
total_items_fetched = Column(Integer, default=0)
items_kept = Column(Integer, default=0)
items_dropped = Column(Integer, default=0)
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
alerts = relationship("AlertItemDB", back_populates="topic", cascade="all, delete-orphan")
rules = relationship("AlertRuleDB", back_populates="topic", cascade="all, delete-orphan")
def __repr__(self):
return f"<AlertTopic {self.name} ({self.feed_type.value})>"
class AlertItemDB(Base):
"""
Einzelner Alert-Eintrag.
Entspricht einem Artikel/Link aus Google Alerts oder RSS.
"""
__tablename__ = 'alert_items'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
topic_id = Column(String(36), ForeignKey('alert_topics.id', ondelete='CASCADE'), nullable=False, index=True)
# Content
title = Column(Text, nullable=False)
url = Column(String(2000), nullable=False)
snippet = Column(Text, default="")
article_text = Column(Text, nullable=True) # Volltext (optional)
# Metadaten
lang = Column(String(10), default="de")
published_at = Column(DateTime, nullable=True, index=True)
fetched_at = Column(DateTime, default=datetime.utcnow, index=True)
processed_at = Column(DateTime, nullable=True)
# Source
source = Column(
SQLEnum(AlertSourceEnum),
default=AlertSourceEnum.GOOGLE_ALERTS_RSS,
nullable=False
)
# Deduplication
url_hash = Column(String(64), unique=True, nullable=False, index=True)
content_hash = Column(String(64), nullable=True) # SimHash für Fuzzy-Matching
canonical_url = Column(String(2000), nullable=True)
# Status
status = Column(
SQLEnum(AlertStatusEnum),
default=AlertStatusEnum.NEW,
nullable=False,
index=True
)
cluster_id = Column(String(36), nullable=True) # Gruppierung ähnlicher Alerts
# Relevanz-Scoring
relevance_score = Column(Float, nullable=True)
relevance_decision = Column(
SQLEnum(RelevanceDecisionEnum),
nullable=True,
index=True
)
relevance_reasons = Column(JSON, default=list) # ["matches_priority", ...]
relevance_summary = Column(Text, nullable=True)
scored_by_model = Column(String(100), nullable=True) # "llama3.1:8b"
scored_at = Column(DateTime, nullable=True)
# User Actions
user_marked_relevant = Column(Boolean, nullable=True) # Explizites Feedback
user_tags = Column(JSON, default=list) # ["wichtig", "später lesen"]
user_notes = Column(Text, nullable=True)
# Guided Mode Fields (NEU)
importance_level = Column(
SQLEnum(ImportanceLevelEnum),
nullable=True,
index=True
)
why_relevant = Column(Text, nullable=True) # "Warum relevant?" Erklärung
next_steps = Column(JSON, default=list) # ["Schulleitung informieren", "Frist beachten"]
action_deadline = Column(DateTime, nullable=True) # Falls es eine Frist gibt
source_name = Column(String(255), nullable=True) # "Kultusministerium NRW"
source_credibility = Column(String(50), default="official") # official, news, blog
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationship
topic = relationship("AlertTopicDB", back_populates="alerts")
# Composite Index für häufige Queries
__table_args__ = (
Index('ix_alert_items_topic_status', 'topic_id', 'status'),
Index('ix_alert_items_topic_decision', 'topic_id', 'relevance_decision'),
)
def __repr__(self):
return f"<AlertItem {self.id[:8]}: {self.title[:50]}... ({self.status.value})>"
class AlertRuleDB(Base):
"""
Filterregel für Alerts.
Definiert Bedingungen und Aktionen für automatische Verarbeitung.
"""
__tablename__ = 'alert_rules'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
topic_id = Column(String(36), ForeignKey('alert_topics.id', ondelete='CASCADE'), nullable=True, index=True)
user_id = Column(String(36), nullable=True, index=True)
# Rule-Details
name = Column(String(255), nullable=False)
description = Column(Text, default="")
# Bedingungen (als JSON)
# Format: [{"field": "title", "op": "contains", "value": "..."}]
conditions = Column(JSON, nullable=False, default=list)
# Aktion
action_type = Column(
SQLEnum(RuleActionEnum),
default=RuleActionEnum.KEEP,
nullable=False
)
action_config = Column(JSON, default=dict) # {"email": "x@y.z", "tags": [...]}
# Priorisierung (höher = wird zuerst ausgeführt)
priority = Column(Integer, default=0, index=True)
is_active = Column(Boolean, default=True, index=True)
# Statistiken
match_count = Column(Integer, default=0)
last_matched_at = Column(DateTime, nullable=True)
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationship
topic = relationship("AlertTopicDB", back_populates="rules")
def __repr__(self):
return f"<AlertRule {self.name} ({self.action_type.value})>"
class AlertProfileDB(Base):
"""
Nutzer-Profil für Relevanz-Scoring.
Speichert Prioritäten, Ausschlüsse und Lern-Beispiele.
"""
__tablename__ = 'alert_profiles'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
user_id = Column(String(36), unique=True, nullable=True, index=True)
# Name für Anzeige (falls mehrere Profile pro User)
name = Column(String(255), default="Default")
# Relevanz-Kriterien
# Format: [{"label": "Inklusion", "weight": 0.9, "keywords": [...], "description": "..."}]
priorities = Column(JSON, default=list)
# Ausschluss-Keywords
exclusions = Column(JSON, default=list) # ["Stellenanzeige", "Werbung"]
# Few-Shot Beispiele für LLM
# Format: [{"title": "...", "url": "...", "reason": "...", "added_at": "..."}]
positive_examples = Column(JSON, default=list)
negative_examples = Column(JSON, default=list)
# Policies
# Format: {"prefer_german_sources": true, "max_age_days": 30}
policies = Column(JSON, default=dict)
# Statistiken
total_scored = Column(Integer, default=0)
total_kept = Column(Integer, default=0)
total_dropped = Column(Integer, default=0)
accuracy_estimate = Column(Float, nullable=True)
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
def __repr__(self):
return f"<AlertProfile {self.name} (user={self.user_id})>"
def get_prompt_context(self) -> str:
"""
Generiere Kontext für LLM-Prompt.
Dieser Text wird in den System-Prompt des Relevanz-Scorers eingefügt.
"""
lines = ["## Relevanzprofil des Nutzers\n"]
# Prioritäten
if self.priorities:
lines.append("### Prioritäten (Themen von Interesse):")
for p in self.priorities:
weight = p.get("weight", 0.5)
weight_label = "Sehr wichtig" if weight > 0.7 else "Wichtig" if weight > 0.4 else "Interessant"
lines.append(f"- **{p.get('label', 'Unbenannt')}** ({weight_label})")
if p.get("description"):
lines.append(f" {p['description']}")
if p.get("keywords"):
lines.append(f" Keywords: {', '.join(p['keywords'])}")
lines.append("")
# Ausschlüsse
if self.exclusions:
lines.append("### Ausschlüsse (ignorieren):")
lines.append(f"Themen mit diesen Keywords: {', '.join(self.exclusions)}")
lines.append("")
# Positive Beispiele (letzte 5)
if self.positive_examples:
lines.append("### Beispiele für relevante Alerts:")
for ex in self.positive_examples[-5:]:
lines.append(f"- \"{ex.get('title', '')}\"")
if ex.get("reason"):
lines.append(f" Grund: {ex['reason']}")
lines.append("")
# Negative Beispiele (letzte 5)
if self.negative_examples:
lines.append("### Beispiele für irrelevante Alerts:")
for ex in self.negative_examples[-5:]:
lines.append(f"- \"{ex.get('title', '')}\"")
if ex.get("reason"):
lines.append(f" Grund: {ex['reason']}")
lines.append("")
# Policies
if self.policies:
lines.append("### Zusätzliche Regeln:")
for key, value in self.policies.items():
lines.append(f"- {key}: {value}")
return "\n".join(lines)
@classmethod
def create_default_education_profile(cls) -> "AlertProfileDB":
"""
Erstelle ein Standard-Profil für Bildungsthemen.
"""
return cls(
name="Bildung Default",
priorities=[
{
"label": "Inklusion",
"weight": 0.9,
"keywords": ["inklusiv", "Förderbedarf", "Behinderung", "Barrierefreiheit"],
"description": "Inklusive Bildung, Förderschulen, Nachteilsausgleich"
},
{
"label": "Datenschutz Schule",
"weight": 0.85,
"keywords": ["DSGVO", "Schülerfotos", "Einwilligung", "personenbezogene Daten"],
"description": "DSGVO in Schulen, Datenschutz bei Klassenfotos"
},
{
"label": "Schulrecht Bayern",
"weight": 0.8,
"keywords": ["BayEUG", "Schulordnung", "Kultusministerium", "Bayern"],
"description": "Bayerisches Schulrecht, Verordnungen"
},
{
"label": "Digitalisierung Schule",
"weight": 0.7,
"keywords": ["DigitalPakt", "Tablet-Klasse", "Lernplattform"],
"description": "Digitale Medien im Unterricht"
},
],
exclusions=["Stellenanzeige", "Praktikum gesucht", "Werbung", "Pressemitteilung"],
policies={
"prefer_german_sources": True,
"max_age_days": 30,
"min_content_length": 100,
}
)
# ============================================================================
# DUAL-MODE SYSTEM: Templates, Subscriptions, Sources, Digests
# ============================================================================
class AlertTemplateDB(Base):
"""
Vorkonfigurierte Alert-Templates (Playbooks).
Für Guided Mode: Lehrer wählen 1-3 Templates statt RSS-Feeds zu konfigurieren.
"""
__tablename__ = 'alert_templates'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
# Template-Identität
slug = Column(String(100), unique=True, nullable=False) # "foerderprogramme", "abitur-updates"
name = Column(String(255), nullable=False) # "Förderprogramme & Fristen"
description = Column(Text, default="") # B1/B2 Deutsch, 1-2 Sätze
icon = Column(String(50), default="") # Emoji: "💰", "📝", "⚖️"
category = Column(String(100), default="") # "administration", "teaching", "it"
# Zielgruppen (welche Rollen profitieren)
target_roles = Column(JSON, default=list) # ["schulleitung", "lehrkraft"]
# Template-Konfiguration
topics_config = Column(JSON, default=list) # Vorkonfigurierte RSS-Feeds
rules_config = Column(JSON, default=list) # Vorkonfigurierte Regeln
profile_config = Column(JSON, default=dict) # Prioritäten/Ausschlüsse
# Importance-Mapping (Score → 5 Stufen)
importance_config = Column(JSON, default=dict) # {"critical": 0.90, "urgent": 0.75, ...}
# Ausgabe-Einstellungen
max_cards_per_day = Column(Integer, default=10)
digest_enabled = Column(Boolean, default=True)
digest_day = Column(String(20), default="monday") # Tag für wöchentlichen Digest
# Lokalisierung
language = Column(String(10), default="de")
# Metadata
is_active = Column(Boolean, default=True)
is_premium = Column(Boolean, default=False) # Für kostenpflichtige Templates
sort_order = Column(Integer, default=0)
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
subscriptions = relationship("UserAlertSubscriptionDB", back_populates="template")
def __repr__(self):
return f"<AlertTemplate {self.slug}: {self.name}>"
class AlertSourceDB(Base):
"""
Alert-Quelle für Migration bestehender Alerts.
Unterstützt: E-Mail-Weiterleitung, RSS-Import, Rekonstruktion.
"""
__tablename__ = 'alert_sources'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
tenant_id = Column(String(36), nullable=True, index=True) # Für Multi-Tenant
user_id = Column(String(36), nullable=True, index=True)
# Quellen-Typ
source_type = Column(
SQLEnum(FeedTypeEnum),
default=FeedTypeEnum.RSS,
nullable=False
)
# Original-Bezeichnung (vom Kunden)
original_label = Column(String(255), nullable=True) # "EU IT Ausschreibungen"
# E-Mail-Weiterleitung
inbound_address = Column(String(255), nullable=True, unique=True) # alerts+tenant123@breakpilot.app
# RSS-Import
rss_url = Column(String(2000), nullable=True)
# Migration-Modus
migration_mode = Column(
SQLEnum(MigrationModeEnum),
default=MigrationModeEnum.IMPORT,
nullable=False
)
# Verknüpfung zu erstelltem Topic
topic_id = Column(String(36), ForeignKey('alert_topics.id', ondelete='SET NULL'), nullable=True)
# Status
is_active = Column(Boolean, default=True)
items_received = Column(Integer, default=0)
last_item_at = Column(DateTime, nullable=True)
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
def __repr__(self):
return f"<AlertSource {self.source_type.value}: {self.original_label}>"
class UserAlertSubscriptionDB(Base):
"""
User-Subscription für Alert-Templates oder Expert-Profile.
Speichert Modus-Wahl, Template-Verknüpfung und Wizard-Zustand.
"""
__tablename__ = 'user_alert_subscriptions'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
user_id = Column(String(36), nullable=False, index=True)
school_id = Column(String(36), nullable=True, index=True) # Optional: Schulkontext
# Modus-Auswahl
mode = Column(
SQLEnum(AlertModeEnum),
default=AlertModeEnum.GUIDED,
nullable=False
)
# Nutzer-Rolle (für Guided Mode)
user_role = Column(
SQLEnum(UserRoleEnum),
nullable=True
)
# Template-Verknüpfung (Guided Mode) - kann mehrere sein
template_id = Column(String(36), ForeignKey('alert_templates.id', ondelete='SET NULL'), nullable=True)
selected_template_ids = Column(JSON, default=list) # Bis zu 3 Templates
# Profil-Verknüpfung (Expert Mode)
profile_id = Column(String(36), ForeignKey('alert_profiles.id', ondelete='SET NULL'), nullable=True)
# Subscription-Einstellungen
is_active = Column(Boolean, default=True)
notification_email = Column(String(255), nullable=True)
# Digest-Präferenzen
digest_enabled = Column(Boolean, default=True)
digest_frequency = Column(String(20), default="weekly") # weekly, daily
digest_day = Column(String(20), default="monday")
last_digest_sent_at = Column(DateTime, nullable=True)
# Wizard-Zustand (für unvollständige Setups)
wizard_step = Column(Integer, default=0)
wizard_completed = Column(Boolean, default=False)
wizard_state = Column(JSON, default=dict) # Zwischenspeicher für Wizard-Daten
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
template = relationship("AlertTemplateDB", back_populates="subscriptions")
profile = relationship("AlertProfileDB")
digests = relationship("AlertDigestDB", back_populates="subscription", cascade="all, delete-orphan")
def __repr__(self):
return f"<UserAlertSubscription {self.user_id} ({self.mode.value})>"
class AlertDigestDB(Base):
"""
Wöchentliche Digest-Zusammenfassung.
Enthält gerenderte Zusammenfassung + Statistiken.
"""
__tablename__ = 'alert_digests'
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
subscription_id = Column(String(36), ForeignKey('user_alert_subscriptions.id', ondelete='CASCADE'), nullable=False, index=True)
user_id = Column(String(36), nullable=False, index=True)
# Zeitraum
period_start = Column(DateTime, nullable=False)
period_end = Column(DateTime, nullable=False)
# Content
title = Column(String(255), default="") # "KW 3/2026 - Ihre Bildungs-Alerts"
summary_html = Column(Text, default="") # Gerenderte HTML-Zusammenfassung
summary_pdf_url = Column(String(500), nullable=True) # Link zum PDF-Export
# Statistiken
total_alerts = Column(Integer, default=0)
kritisch_count = Column(Integer, default=0)
dringend_count = Column(Integer, default=0)
wichtig_count = Column(Integer, default=0)
pruefen_count = Column(Integer, default=0)
info_count = Column(Integer, default=0)
# Enthaltene Alert-IDs
alert_ids = Column(JSON, default=list)
# Status
status = Column(
SQLEnum(DigestStatusEnum),
default=DigestStatusEnum.PENDING,
nullable=False
)
sent_at = Column(DateTime, nullable=True)
error_message = Column(Text, nullable=True)
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow)
# Relationships
subscription = relationship("UserAlertSubscriptionDB", back_populates="digests")
def __repr__(self):
return f"<AlertDigest {self.title} ({self.status.value})>"

View File

@@ -0,0 +1,992 @@
"""
Repository für Alerts Agent - CRUD Operationen für Topics, Items, Rules und Profile.
Abstraktion der Datenbank-Operationen.
"""
import hashlib
from datetime import datetime
from typing import Optional, List, Dict, Any
from sqlalchemy.orm import Session as DBSession
from sqlalchemy.orm.attributes import flag_modified
from sqlalchemy import or_, and_, func
from .models import (
AlertTopicDB, AlertItemDB, AlertRuleDB, AlertProfileDB,
AlertSourceEnum, AlertStatusEnum, RelevanceDecisionEnum,
FeedTypeEnum, RuleActionEnum
)
# =============================================================================
# TOPIC REPOSITORY
# =============================================================================
class TopicRepository:
"""Repository für Alert Topics (Feed-Quellen)."""
def __init__(self, db: DBSession):
self.db = db
# ==================== CREATE ====================
def create(
self,
name: str,
feed_url: str = None,
feed_type: str = "rss",
user_id: str = None,
description: str = "",
fetch_interval_minutes: int = 60,
is_active: bool = True,
) -> AlertTopicDB:
"""Erstellt ein neues Topic."""
import uuid
topic = AlertTopicDB(
id=str(uuid.uuid4()),
user_id=user_id,
name=name,
description=description,
feed_url=feed_url,
feed_type=FeedTypeEnum(feed_type),
fetch_interval_minutes=fetch_interval_minutes,
is_active=is_active,
)
self.db.add(topic)
self.db.commit()
self.db.refresh(topic)
return topic
# ==================== READ ====================
def get_by_id(self, topic_id: str) -> Optional[AlertTopicDB]:
"""Holt ein Topic nach ID."""
return self.db.query(AlertTopicDB).filter(
AlertTopicDB.id == topic_id
).first()
def get_all(
self,
user_id: str = None,
is_active: bool = None,
limit: int = 100,
offset: int = 0,
) -> List[AlertTopicDB]:
"""Holt alle Topics mit optionalen Filtern."""
query = self.db.query(AlertTopicDB)
if user_id:
query = query.filter(AlertTopicDB.user_id == user_id)
if is_active is not None:
query = query.filter(AlertTopicDB.is_active == is_active)
return query.order_by(
AlertTopicDB.created_at.desc()
).offset(offset).limit(limit).all()
def get_active_for_fetch(self) -> List[AlertTopicDB]:
"""Holt alle aktiven Topics die gefetcht werden sollten."""
# Topics wo fetch_interval_minutes vergangen ist
return self.db.query(AlertTopicDB).filter(
AlertTopicDB.is_active == True,
AlertTopicDB.feed_url.isnot(None),
).all()
# ==================== UPDATE ====================
def update(
self,
topic_id: str,
name: str = None,
description: str = None,
feed_url: str = None,
feed_type: str = None,
is_active: bool = None,
fetch_interval_minutes: int = None,
) -> Optional[AlertTopicDB]:
"""Aktualisiert ein Topic."""
topic = self.get_by_id(topic_id)
if not topic:
return None
if name is not None:
topic.name = name
if description is not None:
topic.description = description
if feed_url is not None:
topic.feed_url = feed_url
if feed_type is not None:
topic.feed_type = FeedTypeEnum(feed_type)
if is_active is not None:
topic.is_active = is_active
if fetch_interval_minutes is not None:
topic.fetch_interval_minutes = fetch_interval_minutes
self.db.commit()
self.db.refresh(topic)
return topic
def update_fetch_status(
self,
topic_id: str,
last_fetch_error: str = None,
items_fetched: int = 0,
) -> Optional[AlertTopicDB]:
"""Aktualisiert den Fetch-Status eines Topics."""
topic = self.get_by_id(topic_id)
if not topic:
return None
topic.last_fetched_at = datetime.utcnow()
topic.last_fetch_error = last_fetch_error
topic.total_items_fetched += items_fetched
self.db.commit()
self.db.refresh(topic)
return topic
def increment_stats(
self,
topic_id: str,
kept: int = 0,
dropped: int = 0,
) -> Optional[AlertTopicDB]:
"""Erhöht die Statistiken eines Topics."""
topic = self.get_by_id(topic_id)
if not topic:
return None
topic.items_kept += kept
topic.items_dropped += dropped
self.db.commit()
self.db.refresh(topic)
return topic
# ==================== DELETE ====================
def delete(self, topic_id: str) -> bool:
"""Löscht ein Topic (und alle zugehörigen Items via CASCADE)."""
topic = self.get_by_id(topic_id)
if not topic:
return False
self.db.delete(topic)
self.db.commit()
return True
# ==================== CONVERSION ====================
def to_dict(self, topic: AlertTopicDB) -> Dict[str, Any]:
"""Konvertiert DB-Model zu Dictionary."""
return {
"id": topic.id,
"user_id": topic.user_id,
"name": topic.name,
"description": topic.description,
"feed_url": topic.feed_url,
"feed_type": topic.feed_type.value,
"is_active": topic.is_active,
"fetch_interval_minutes": topic.fetch_interval_minutes,
"last_fetched_at": topic.last_fetched_at.isoformat() if topic.last_fetched_at else None,
"last_fetch_error": topic.last_fetch_error,
"stats": {
"total_items_fetched": topic.total_items_fetched,
"items_kept": topic.items_kept,
"items_dropped": topic.items_dropped,
},
"created_at": topic.created_at.isoformat() if topic.created_at else None,
"updated_at": topic.updated_at.isoformat() if topic.updated_at else None,
}
# =============================================================================
# ALERT ITEM REPOSITORY
# =============================================================================
class AlertItemRepository:
"""Repository für Alert Items (einzelne Alerts/Artikel)."""
def __init__(self, db: DBSession):
self.db = db
# ==================== CREATE ====================
def create(
self,
topic_id: str,
title: str,
url: str,
snippet: str = "",
source: str = "google_alerts_rss",
published_at: datetime = None,
lang: str = "de",
) -> AlertItemDB:
"""Erstellt einen neuen Alert."""
import uuid
# URL-Hash berechnen
url_hash = self._compute_url_hash(url)
alert = AlertItemDB(
id=str(uuid.uuid4()),
topic_id=topic_id,
title=title,
url=url,
snippet=snippet,
source=AlertSourceEnum(source),
published_at=published_at,
lang=lang,
url_hash=url_hash,
canonical_url=self._normalize_url(url),
)
self.db.add(alert)
self.db.commit()
self.db.refresh(alert)
return alert
def create_if_not_exists(
self,
topic_id: str,
title: str,
url: str,
snippet: str = "",
source: str = "google_alerts_rss",
published_at: datetime = None,
) -> Optional[AlertItemDB]:
"""Erstellt einen Alert nur wenn URL noch nicht existiert."""
url_hash = self._compute_url_hash(url)
existing = self.db.query(AlertItemDB).filter(
AlertItemDB.url_hash == url_hash
).first()
if existing:
return None # Duplikat
return self.create(
topic_id=topic_id,
title=title,
url=url,
snippet=snippet,
source=source,
published_at=published_at,
)
# ==================== READ ====================
def get_by_id(self, alert_id: str) -> Optional[AlertItemDB]:
"""Holt einen Alert nach ID."""
return self.db.query(AlertItemDB).filter(
AlertItemDB.id == alert_id
).first()
def get_by_url_hash(self, url_hash: str) -> Optional[AlertItemDB]:
"""Holt einen Alert nach URL-Hash."""
return self.db.query(AlertItemDB).filter(
AlertItemDB.url_hash == url_hash
).first()
def get_inbox(
self,
user_id: str = None,
topic_id: str = None,
decision: str = None,
status: str = None,
limit: int = 50,
offset: int = 0,
) -> List[AlertItemDB]:
"""
Holt Inbox-Items mit Filtern.
Ohne decision werden KEEP und REVIEW angezeigt.
"""
query = self.db.query(AlertItemDB)
if topic_id:
query = query.filter(AlertItemDB.topic_id == topic_id)
if decision:
query = query.filter(
AlertItemDB.relevance_decision == RelevanceDecisionEnum(decision)
)
else:
# Default: KEEP und REVIEW
query = query.filter(
or_(
AlertItemDB.relevance_decision == RelevanceDecisionEnum.KEEP,
AlertItemDB.relevance_decision == RelevanceDecisionEnum.REVIEW,
AlertItemDB.relevance_decision.is_(None)
)
)
if status:
query = query.filter(AlertItemDB.status == AlertStatusEnum(status))
return query.order_by(
AlertItemDB.relevance_score.desc().nullslast(),
AlertItemDB.fetched_at.desc()
).offset(offset).limit(limit).all()
def get_unscored(
self,
topic_id: str = None,
limit: int = 100,
) -> List[AlertItemDB]:
"""Holt alle unbewerteten Alerts."""
query = self.db.query(AlertItemDB).filter(
AlertItemDB.status == AlertStatusEnum.NEW
)
if topic_id:
query = query.filter(AlertItemDB.topic_id == topic_id)
return query.order_by(AlertItemDB.fetched_at.desc()).limit(limit).all()
def get_by_topic(
self,
topic_id: str,
limit: int = 100,
offset: int = 0,
) -> List[AlertItemDB]:
"""Holt alle Alerts eines Topics."""
return self.db.query(AlertItemDB).filter(
AlertItemDB.topic_id == topic_id
).order_by(
AlertItemDB.fetched_at.desc()
).offset(offset).limit(limit).all()
def count_by_status(self, topic_id: str = None) -> Dict[str, int]:
"""Zählt Alerts nach Status."""
query = self.db.query(
AlertItemDB.status,
func.count(AlertItemDB.id).label('count')
)
if topic_id:
query = query.filter(AlertItemDB.topic_id == topic_id)
results = query.group_by(AlertItemDB.status).all()
return {r[0].value: r[1] for r in results}
def count_by_decision(self, topic_id: str = None) -> Dict[str, int]:
"""Zählt Alerts nach Relevanz-Entscheidung."""
query = self.db.query(
AlertItemDB.relevance_decision,
func.count(AlertItemDB.id).label('count')
)
if topic_id:
query = query.filter(AlertItemDB.topic_id == topic_id)
results = query.group_by(AlertItemDB.relevance_decision).all()
return {
(r[0].value if r[0] else "unscored"): r[1]
for r in results
}
# ==================== UPDATE ====================
def update_scoring(
self,
alert_id: str,
score: float,
decision: str,
reasons: List[str] = None,
summary: str = None,
model: str = None,
) -> Optional[AlertItemDB]:
"""Aktualisiert das Scoring eines Alerts."""
alert = self.get_by_id(alert_id)
if not alert:
return None
alert.relevance_score = score
alert.relevance_decision = RelevanceDecisionEnum(decision)
alert.relevance_reasons = reasons or []
alert.relevance_summary = summary
alert.scored_by_model = model
alert.scored_at = datetime.utcnow()
alert.status = AlertStatusEnum.SCORED
alert.processed_at = datetime.utcnow()
self.db.commit()
self.db.refresh(alert)
return alert
def update_status(
self,
alert_id: str,
status: str,
) -> Optional[AlertItemDB]:
"""Aktualisiert den Status eines Alerts."""
alert = self.get_by_id(alert_id)
if not alert:
return None
alert.status = AlertStatusEnum(status)
self.db.commit()
self.db.refresh(alert)
return alert
def mark_reviewed(
self,
alert_id: str,
is_relevant: bool,
notes: str = None,
tags: List[str] = None,
) -> Optional[AlertItemDB]:
"""Markiert einen Alert als reviewed mit Feedback."""
alert = self.get_by_id(alert_id)
if not alert:
return None
alert.status = AlertStatusEnum.REVIEWED
alert.user_marked_relevant = is_relevant
if notes:
alert.user_notes = notes
if tags:
alert.user_tags = tags
self.db.commit()
self.db.refresh(alert)
return alert
def archive(self, alert_id: str) -> Optional[AlertItemDB]:
"""Archiviert einen Alert."""
return self.update_status(alert_id, "archived")
# ==================== DELETE ====================
def delete(self, alert_id: str) -> bool:
"""Löscht einen Alert."""
alert = self.get_by_id(alert_id)
if not alert:
return False
self.db.delete(alert)
self.db.commit()
return True
def delete_old(self, days: int = 90, topic_id: str = None) -> int:
"""Löscht alte archivierte Alerts."""
from datetime import timedelta
cutoff = datetime.utcnow() - timedelta(days=days)
query = self.db.query(AlertItemDB).filter(
AlertItemDB.status == AlertStatusEnum.ARCHIVED,
AlertItemDB.fetched_at < cutoff,
)
if topic_id:
query = query.filter(AlertItemDB.topic_id == topic_id)
count = query.delete()
self.db.commit()
return count
# ==================== FOR RSS FETCHER ====================
def get_existing_urls(self, topic_id: str) -> set:
"""
Holt alle bekannten URL-Hashes für ein Topic.
Wird vom RSS-Fetcher verwendet um Duplikate zu vermeiden.
"""
results = self.db.query(AlertItemDB.url_hash).filter(
AlertItemDB.topic_id == topic_id
).all()
return {r[0] for r in results if r[0]}
def create_from_alert_item(self, alert_item, topic_id: str) -> AlertItemDB:
"""
Erstellt einen Alert aus einem AlertItem-Objekt vom RSS-Fetcher.
Args:
alert_item: AlertItem from rss_fetcher
topic_id: Topic ID to associate with
Returns:
Created AlertItemDB instance
"""
return self.create(
topic_id=topic_id,
title=alert_item.title,
url=alert_item.url,
snippet=alert_item.snippet or "",
source=alert_item.source.value if hasattr(alert_item.source, 'value') else str(alert_item.source),
published_at=alert_item.published_at,
)
# ==================== HELPER ====================
def _compute_url_hash(self, url: str) -> str:
"""Berechnet SHA256 Hash der normalisierten URL."""
normalized = self._normalize_url(url)
return hashlib.sha256(normalized.encode()).hexdigest()[:16]
def _normalize_url(self, url: str) -> str:
"""Normalisiert URL für Deduplizierung."""
import urllib.parse
parsed = urllib.parse.urlparse(url)
# Tracking-Parameter entfernen
tracking_params = {
"utm_source", "utm_medium", "utm_campaign", "utm_content", "utm_term",
"fbclid", "gclid", "ref", "source"
}
query_params = urllib.parse.parse_qs(parsed.query)
cleaned_params = {k: v for k, v in query_params.items()
if k.lower() not in tracking_params}
cleaned_query = urllib.parse.urlencode(cleaned_params, doseq=True)
# Rekonstruiere URL ohne Fragment
normalized = urllib.parse.urlunparse((
parsed.scheme,
parsed.netloc.lower(),
parsed.path.rstrip("/"),
parsed.params,
cleaned_query,
"" # No fragment
))
return normalized
# ==================== CONVERSION ====================
def to_dict(self, alert: AlertItemDB) -> Dict[str, Any]:
"""Konvertiert DB-Model zu Dictionary."""
return {
"id": alert.id,
"topic_id": alert.topic_id,
"title": alert.title,
"url": alert.url,
"snippet": alert.snippet,
"source": alert.source.value,
"lang": alert.lang,
"published_at": alert.published_at.isoformat() if alert.published_at else None,
"fetched_at": alert.fetched_at.isoformat() if alert.fetched_at else None,
"status": alert.status.value,
"relevance": {
"score": alert.relevance_score,
"decision": alert.relevance_decision.value if alert.relevance_decision else None,
"reasons": alert.relevance_reasons,
"summary": alert.relevance_summary,
"model": alert.scored_by_model,
"scored_at": alert.scored_at.isoformat() if alert.scored_at else None,
},
"user_feedback": {
"marked_relevant": alert.user_marked_relevant,
"tags": alert.user_tags,
"notes": alert.user_notes,
},
}
# =============================================================================
# ALERT RULE REPOSITORY
# =============================================================================
class RuleRepository:
"""Repository für Alert Rules (Filterregeln)."""
def __init__(self, db: DBSession):
self.db = db
# ==================== CREATE ====================
def create(
self,
name: str,
conditions: List[Dict],
action_type: str = "keep",
action_config: Dict = None,
topic_id: str = None,
user_id: str = None,
description: str = "",
priority: int = 0,
) -> AlertRuleDB:
"""Erstellt eine neue Regel."""
import uuid
rule = AlertRuleDB(
id=str(uuid.uuid4()),
topic_id=topic_id,
user_id=user_id,
name=name,
description=description,
conditions=conditions,
action_type=RuleActionEnum(action_type),
action_config=action_config or {},
priority=priority,
)
self.db.add(rule)
self.db.commit()
self.db.refresh(rule)
return rule
# ==================== READ ====================
def get_by_id(self, rule_id: str) -> Optional[AlertRuleDB]:
"""Holt eine Regel nach ID."""
return self.db.query(AlertRuleDB).filter(
AlertRuleDB.id == rule_id
).first()
def get_active(
self,
topic_id: str = None,
user_id: str = None,
) -> List[AlertRuleDB]:
"""Holt alle aktiven Regeln, sortiert nach Priorität."""
query = self.db.query(AlertRuleDB).filter(
AlertRuleDB.is_active == True
)
if topic_id:
# Topic-spezifische und globale Regeln
query = query.filter(
or_(
AlertRuleDB.topic_id == topic_id,
AlertRuleDB.topic_id.is_(None)
)
)
if user_id:
query = query.filter(
or_(
AlertRuleDB.user_id == user_id,
AlertRuleDB.user_id.is_(None)
)
)
return query.order_by(AlertRuleDB.priority.desc()).all()
def get_all(
self,
user_id: str = None,
topic_id: str = None,
is_active: bool = None,
) -> List[AlertRuleDB]:
"""Holt alle Regeln mit optionalen Filtern."""
query = self.db.query(AlertRuleDB)
if user_id:
query = query.filter(AlertRuleDB.user_id == user_id)
if topic_id:
query = query.filter(AlertRuleDB.topic_id == topic_id)
if is_active is not None:
query = query.filter(AlertRuleDB.is_active == is_active)
return query.order_by(AlertRuleDB.priority.desc()).all()
# ==================== UPDATE ====================
def update(
self,
rule_id: str,
name: str = None,
description: str = None,
conditions: List[Dict] = None,
action_type: str = None,
action_config: Dict = None,
priority: int = None,
is_active: bool = None,
) -> Optional[AlertRuleDB]:
"""Aktualisiert eine Regel."""
rule = self.get_by_id(rule_id)
if not rule:
return None
if name is not None:
rule.name = name
if description is not None:
rule.description = description
if conditions is not None:
rule.conditions = conditions
if action_type is not None:
rule.action_type = RuleActionEnum(action_type)
if action_config is not None:
rule.action_config = action_config
if priority is not None:
rule.priority = priority
if is_active is not None:
rule.is_active = is_active
self.db.commit()
self.db.refresh(rule)
return rule
def increment_match_count(self, rule_id: str) -> Optional[AlertRuleDB]:
"""Erhöht den Match-Counter einer Regel."""
rule = self.get_by_id(rule_id)
if not rule:
return None
rule.match_count += 1
rule.last_matched_at = datetime.utcnow()
self.db.commit()
self.db.refresh(rule)
return rule
# ==================== DELETE ====================
def delete(self, rule_id: str) -> bool:
"""Löscht eine Regel."""
rule = self.get_by_id(rule_id)
if not rule:
return False
self.db.delete(rule)
self.db.commit()
return True
# ==================== CONVERSION ====================
def to_dict(self, rule: AlertRuleDB) -> Dict[str, Any]:
"""Konvertiert DB-Model zu Dictionary."""
return {
"id": rule.id,
"topic_id": rule.topic_id,
"user_id": rule.user_id,
"name": rule.name,
"description": rule.description,
"conditions": rule.conditions,
"action_type": rule.action_type.value,
"action_config": rule.action_config,
"priority": rule.priority,
"is_active": rule.is_active,
"stats": {
"match_count": rule.match_count,
"last_matched_at": rule.last_matched_at.isoformat() if rule.last_matched_at else None,
},
"created_at": rule.created_at.isoformat() if rule.created_at else None,
"updated_at": rule.updated_at.isoformat() if rule.updated_at else None,
}
# =============================================================================
# ALERT PROFILE REPOSITORY
# =============================================================================
class ProfileRepository:
"""Repository für Alert Profiles (Nutzer-Profile für Relevanz-Scoring)."""
def __init__(self, db: DBSession):
self.db = db
# ==================== CREATE / GET-OR-CREATE ====================
def get_or_create(self, user_id: str = None) -> AlertProfileDB:
"""Holt oder erstellt ein Profil."""
profile = self.get_by_user_id(user_id)
if profile:
return profile
# Neues Profil erstellen
import uuid
profile = AlertProfileDB(
id=str(uuid.uuid4()),
user_id=user_id,
name="Default" if not user_id else f"Profile {user_id[:8]}",
)
self.db.add(profile)
self.db.commit()
self.db.refresh(profile)
return profile
def create_default_education_profile(self, user_id: str = None) -> AlertProfileDB:
"""Erstellt ein Standard-Profil für Bildungsthemen."""
import uuid
profile = AlertProfileDB(
id=str(uuid.uuid4()),
user_id=user_id,
name="Bildung Default",
priorities=[
{
"label": "Inklusion",
"weight": 0.9,
"keywords": ["inklusiv", "Förderbedarf", "Behinderung", "Barrierefreiheit"],
"description": "Inklusive Bildung, Förderschulen, Nachteilsausgleich"
},
{
"label": "Datenschutz Schule",
"weight": 0.85,
"keywords": ["DSGVO", "Schülerfotos", "Einwilligung", "personenbezogene Daten"],
"description": "DSGVO in Schulen, Datenschutz bei Klassenfotos"
},
{
"label": "Schulrecht Bayern",
"weight": 0.8,
"keywords": ["BayEUG", "Schulordnung", "Kultusministerium", "Bayern"],
"description": "Bayerisches Schulrecht, Verordnungen"
},
{
"label": "Digitalisierung Schule",
"weight": 0.7,
"keywords": ["DigitalPakt", "Tablet-Klasse", "Lernplattform"],
"description": "Digitale Medien im Unterricht"
},
],
exclusions=["Stellenanzeige", "Praktikum gesucht", "Werbung", "Pressemitteilung"],
policies={
"prefer_german_sources": True,
"max_age_days": 30,
"min_content_length": 100,
}
)
self.db.add(profile)
self.db.commit()
self.db.refresh(profile)
return profile
# ==================== READ ====================
def get_by_id(self, profile_id: str) -> Optional[AlertProfileDB]:
"""Holt ein Profil nach ID."""
return self.db.query(AlertProfileDB).filter(
AlertProfileDB.id == profile_id
).first()
def get_by_user_id(self, user_id: str) -> Optional[AlertProfileDB]:
"""Holt ein Profil nach User-ID."""
if not user_id:
# Default-Profil ohne User
return self.db.query(AlertProfileDB).filter(
AlertProfileDB.user_id.is_(None)
).first()
return self.db.query(AlertProfileDB).filter(
AlertProfileDB.user_id == user_id
).first()
# ==================== UPDATE ====================
def update_priorities(
self,
profile_id: str,
priorities: List[Dict],
) -> Optional[AlertProfileDB]:
"""Aktualisiert die Prioritäten eines Profils."""
profile = self.get_by_id(profile_id)
if not profile:
return None
profile.priorities = priorities
self.db.commit()
self.db.refresh(profile)
return profile
def update_exclusions(
self,
profile_id: str,
exclusions: List[str],
) -> Optional[AlertProfileDB]:
"""Aktualisiert die Ausschlüsse eines Profils."""
profile = self.get_by_id(profile_id)
if not profile:
return None
profile.exclusions = exclusions
self.db.commit()
self.db.refresh(profile)
return profile
def add_feedback(
self,
profile_id: str,
title: str,
url: str,
is_relevant: bool,
reason: str = "",
) -> Optional[AlertProfileDB]:
"""Fügt Feedback als Beispiel hinzu."""
profile = self.get_by_id(profile_id)
if not profile:
return None
example = {
"title": title,
"url": url,
"reason": reason,
"added_at": datetime.utcnow().isoformat(),
}
if is_relevant:
examples = list(profile.positive_examples or [])
examples.append(example)
profile.positive_examples = examples[-20:] # Max 20
profile.total_kept += 1
flag_modified(profile, "positive_examples")
else:
examples = list(profile.negative_examples or [])
examples.append(example)
profile.negative_examples = examples[-20:] # Max 20
profile.total_dropped += 1
flag_modified(profile, "negative_examples")
profile.total_scored += 1
self.db.commit()
self.db.refresh(profile)
return profile
def update_stats(
self,
profile_id: str,
kept: int = 0,
dropped: int = 0,
) -> Optional[AlertProfileDB]:
"""Aktualisiert die Statistiken eines Profils."""
profile = self.get_by_id(profile_id)
if not profile:
return None
profile.total_scored += kept + dropped
profile.total_kept += kept
profile.total_dropped += dropped
self.db.commit()
self.db.refresh(profile)
return profile
# ==================== DELETE ====================
def delete(self, profile_id: str) -> bool:
"""Löscht ein Profil."""
profile = self.get_by_id(profile_id)
if not profile:
return False
self.db.delete(profile)
self.db.commit()
return True
# ==================== CONVERSION ====================
def to_dict(self, profile: AlertProfileDB) -> Dict[str, Any]:
"""Konvertiert DB-Model zu Dictionary."""
return {
"id": profile.id,
"user_id": profile.user_id,
"name": profile.name,
"priorities": profile.priorities,
"exclusions": profile.exclusions,
"policies": profile.policies,
"examples": {
"positive": len(profile.positive_examples or []),
"negative": len(profile.negative_examples or []),
},
"stats": {
"total_scored": profile.total_scored,
"total_kept": profile.total_kept,
"total_dropped": profile.total_dropped,
"accuracy_estimate": profile.accuracy_estimate,
},
"created_at": profile.created_at.isoformat() if profile.created_at else None,
"updated_at": profile.updated_at.isoformat() if profile.updated_at else None,
}

View File

@@ -0,0 +1,8 @@
"""Alert Ingestion Modules."""
from .rss_fetcher import RSSFetcher, FeedConfig
__all__ = [
"RSSFetcher",
"FeedConfig",
]

View File

@@ -0,0 +1,356 @@
"""
Email Parser für Google Alerts.
Parst Google Alert E-Mails und extrahiert Alert-Items.
Google Alert E-Mail Format:
- Subject: Google Alert - <Suchbegriff>
- Body enthält HTML mit Links zu Artikeln
- Jeder Artikel hat: Titel, URL, Snippet, Quelle
"""
import re
import logging
from dataclasses import dataclass
from datetime import datetime
from typing import List, Optional, Dict, Any
from html import unescape
from urllib.parse import urlparse, parse_qs, unquote
from email import message_from_bytes, message_from_string
from email.message import EmailMessage
from bs4 import BeautifulSoup
logger = logging.getLogger(__name__)
@dataclass
class ParsedAlertEmail:
"""Ergebnis eines geparsten Google Alert E-Mails."""
search_term: str
items: List[Dict[str, Any]]
received_at: datetime
message_id: Optional[str] = None
def extract_real_url(google_redirect_url: str) -> str:
"""
Extrahiert die echte URL aus einem Google Redirect-Link.
Google Alert Links haben das Format:
https://www.google.com/url?rct=j&sa=t&url=<ENCODED_URL>&...
Args:
google_redirect_url: Google Redirect URL
Returns:
Echte Ziel-URL
"""
if "google.com/url" in google_redirect_url:
parsed = urlparse(google_redirect_url)
params = parse_qs(parsed.query)
if "url" in params:
return unquote(params["url"][0])
return google_redirect_url
def clean_text(text: str) -> str:
"""Bereinigt Text von HTML-Entities und überschüssigem Whitespace."""
if not text:
return ""
# HTML-Entities dekodieren
text = unescape(text)
# Überschüssigen Whitespace entfernen
text = re.sub(r'\s+', ' ', text)
return text.strip()
def parse_google_alert_html(html_content: str) -> List[Dict[str, Any]]:
"""
Parst den HTML-Body einer Google Alert E-Mail.
Args:
html_content: HTML-Inhalt der E-Mail
Returns:
Liste von Alert-Items mit title, url, snippet, source
"""
items = []
try:
soup = BeautifulSoup(html_content, 'html.parser')
# Google Alerts verwendet verschiedene Formate
# Format 1: Tabellen-basiert (älteres Format)
for table in soup.find_all('table'):
# Suche nach Links in der Tabelle
for link in table.find_all('a', href=True):
href = link.get('href', '')
# Nur Google-Redirect-Links (echte Alert-Links)
if 'google.com/url' not in href:
continue
real_url = extract_real_url(href)
# Titel ist der Link-Text
title = clean_text(link.get_text())
if not title or len(title) < 5:
continue
# Snippet: Text nach dem Link in der gleichen Zelle
parent = link.find_parent('td') or link.find_parent('div')
snippet = ""
if parent:
# Text nach dem Link extrahieren
full_text = clean_text(parent.get_text())
if title in full_text:
snippet = full_text.replace(title, '').strip()
# Ersten 300 Zeichen als Snippet
snippet = snippet[:300]
# Quelle extrahieren (Domain)
source_domain = urlparse(real_url).netloc
items.append({
"title": title,
"url": real_url,
"snippet": snippet,
"source": source_domain,
})
# Format 2: Div-basiert (neueres Format)
if not items:
for div in soup.find_all('div', class_=re.compile(r'.*')):
for link in div.find_all('a', href=True):
href = link.get('href', '')
if 'google.com/url' not in href:
continue
real_url = extract_real_url(href)
title = clean_text(link.get_text())
if not title or len(title) < 5:
continue
# Duplikate vermeiden
if any(i['url'] == real_url for i in items):
continue
source_domain = urlparse(real_url).netloc
items.append({
"title": title,
"url": real_url,
"snippet": "",
"source": source_domain,
})
except Exception as e:
logger.error(f"Error parsing Google Alert HTML: {e}")
return items
def parse_email_message(
email_bytes: bytes = None,
email_string: str = None,
) -> Optional[ParsedAlertEmail]:
"""
Parst eine E-Mail-Nachricht (Google Alert Format).
Args:
email_bytes: Raw E-Mail als Bytes
email_string: E-Mail als String
Returns:
ParsedAlertEmail oder None bei Fehler
"""
try:
if email_bytes:
msg = message_from_bytes(email_bytes)
elif email_string:
msg = message_from_string(email_string)
else:
return None
# Prüfen ob es eine Google Alert E-Mail ist
subject = msg.get('Subject', '')
if 'Google Alert' not in subject:
logger.debug(f"Not a Google Alert email: {subject}")
return None
# Suchbegriff aus Subject extrahieren
# Format: "Google Alert - <Suchbegriff>"
search_term = ""
if ' - ' in subject:
search_term = subject.split(' - ', 1)[1].strip()
# Message-ID
message_id = msg.get('Message-ID', '')
# Empfangsdatum
date_str = msg.get('Date', '')
received_at = datetime.utcnow() # Fallback
# HTML-Body extrahieren
html_content = ""
if msg.is_multipart():
for part in msg.walk():
content_type = part.get_content_type()
if content_type == 'text/html':
payload = part.get_payload(decode=True)
if payload:
charset = part.get_content_charset() or 'utf-8'
html_content = payload.decode(charset, errors='replace')
break
else:
content_type = msg.get_content_type()
if content_type == 'text/html':
payload = msg.get_payload(decode=True)
if payload:
charset = msg.get_content_charset() or 'utf-8'
html_content = payload.decode(charset, errors='replace')
if not html_content:
logger.warning(f"No HTML content in Google Alert email: {subject}")
return None
# HTML parsen
items = parse_google_alert_html(html_content)
return ParsedAlertEmail(
search_term=search_term,
items=items,
received_at=received_at,
message_id=message_id,
)
except Exception as e:
logger.error(f"Error parsing email message: {e}")
return None
async def process_alert_emails(
emails: List[bytes],
topic_id: str,
db,
) -> Dict[str, int]:
"""
Verarbeitet eine Liste von Google Alert E-Mails und speichert Items in DB.
Args:
emails: Liste von E-Mails als Bytes
topic_id: ID des zugehörigen Topics
db: SQLAlchemy Session
Returns:
Dict mit new_items und duplicates_skipped
"""
from alerts_agent.db.repository import AlertItemRepository
from alerts_agent.db.models import AlertSourceEnum
repo = AlertItemRepository(db)
total_new = 0
total_skipped = 0
for email_bytes in emails:
parsed = parse_email_message(email_bytes=email_bytes)
if not parsed:
continue
for item in parsed.items:
alert = repo.create_if_not_exists(
topic_id=topic_id,
title=item["title"],
url=item["url"],
snippet=item.get("snippet", ""),
source=AlertSourceEnum.GOOGLE_ALERTS_EMAIL,
)
if alert:
total_new += 1
else:
total_skipped += 1
return {
"new_items": total_new,
"duplicates_skipped": total_skipped,
}
# IMAP-Integration für automatisches E-Mail-Fetching
async def fetch_emails_from_imap(
host: str,
username: str,
password: str,
folder: str = "INBOX",
search_criteria: str = 'FROM "googlealerts-noreply@google.com" UNSEEN',
limit: int = 100,
) -> List[bytes]:
"""
Holt E-Mails von einem IMAP-Server.
Args:
host: IMAP-Server Hostname
username: IMAP-Benutzername
password: IMAP-Passwort
folder: IMAP-Ordner (default: INBOX)
search_criteria: IMAP-Suchkriterien
limit: Maximale Anzahl E-Mails
Returns:
Liste von E-Mails als Bytes
"""
try:
import aioimaplib
except ImportError:
logger.error("aioimaplib not installed. Run: pip install aioimaplib")
return []
emails = []
try:
# IMAP-Verbindung
client = aioimaplib.IMAP4_SSL(host)
await client.wait_hello_from_server()
# Login
await client.login(username, password)
# Ordner auswählen
await client.select(folder)
# E-Mails suchen
result, data = await client.search(search_criteria)
if result != 'OK':
logger.error(f"IMAP search failed: {result}")
return []
# Message-IDs extrahieren
message_ids = data[0].split()[-limit:] # Letzte N E-Mails
# E-Mails abrufen
for msg_id in message_ids:
result, data = await client.fetch(msg_id, '(RFC822)')
if result == 'OK' and data:
# data ist eine Liste von Tupeln
for item in data:
if isinstance(item, tuple) and len(item) >= 2:
emails.append(item[1])
# Logout
await client.logout()
except Exception as e:
logger.error(f"IMAP fetch error: {e}")
return emails

View File

@@ -0,0 +1,383 @@
"""
RSS Fetcher für Google Alerts.
Liest Google Alerts RSS Feeds und konvertiert sie in AlertItems.
Google Alerts RSS Feed Format:
- Feed URL: https://google.com/alerts/feeds/<user_id>/<alert_id>
- Entries enthalten: title, link, published, content
"""
import asyncio
import logging
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional
from html import unescape
import re
import httpx
try:
import feedparser
FEEDPARSER_AVAILABLE = True
except ImportError:
FEEDPARSER_AVAILABLE = False
from ..models.alert_item import AlertItem, AlertSource, AlertStatus
logger = logging.getLogger(__name__)
@dataclass
class FeedConfig:
"""Konfiguration für einen RSS Feed."""
url: str
topic_label: str # z.B. "Inklusion Bayern"
enabled: bool = True
fetch_interval_minutes: int = 60
last_fetched: Optional[datetime] = None
last_entry_id: Optional[str] = None # Für Duplikat-Erkennung
@dataclass
class FetchResult:
"""Ergebnis eines Feed-Fetches."""
feed_url: str
success: bool
items: list = field(default_factory=list) # List[AlertItem]
error: Optional[str] = None
fetched_at: datetime = field(default_factory=datetime.utcnow)
new_items_count: int = 0
skipped_count: int = 0 # Bereits bekannte Items
class RSSFetcher:
"""
Fetcher für Google Alerts RSS Feeds.
Usage:
fetcher = RSSFetcher()
fetcher.add_feed("https://google.com/alerts/feeds/...", "Inklusion")
results = await fetcher.fetch_all()
"""
def __init__(self, timeout: int = 30, user_agent: str = "BreakPilot-AlertAgent/0.1"):
"""
Initialisiere RSSFetcher.
Args:
timeout: HTTP Timeout in Sekunden
user_agent: User-Agent Header
"""
if not FEEDPARSER_AVAILABLE:
raise ImportError(
"feedparser ist nicht installiert. "
"Installiere mit: pip install feedparser"
)
self.feeds: list[FeedConfig] = []
self.timeout = timeout
self.user_agent = user_agent
self._client: Optional[httpx.AsyncClient] = None
def add_feed(self, url: str, topic_label: str, **kwargs) -> None:
"""Füge einen Feed hinzu."""
config = FeedConfig(url=url, topic_label=topic_label, **kwargs)
self.feeds.append(config)
logger.info(f"Feed hinzugefügt: {topic_label} ({url[:50]}...)")
def remove_feed(self, url: str) -> bool:
"""Entferne einen Feed."""
before = len(self.feeds)
self.feeds = [f for f in self.feeds if f.url != url]
return len(self.feeds) < before
async def _get_client(self) -> httpx.AsyncClient:
"""Hole oder erstelle HTTP Client."""
if self._client is None or self._client.is_closed:
self._client = httpx.AsyncClient(
timeout=self.timeout,
headers={"User-Agent": self.user_agent},
follow_redirects=True,
)
return self._client
async def close(self) -> None:
"""Schließe HTTP Client."""
if self._client:
await self._client.aclose()
self._client = None
async def fetch_feed(self, config: FeedConfig,
known_entry_ids: Optional[set] = None) -> FetchResult:
"""
Fetch einen einzelnen Feed.
Args:
config: Feed-Konfiguration
known_entry_ids: Optional Set von bereits bekannten Entry-IDs
Returns:
FetchResult mit AlertItems
"""
result = FetchResult(feed_url=config.url, success=False)
known_ids = known_entry_ids or set()
try:
client = await self._get_client()
response = await client.get(config.url)
response.raise_for_status()
# Parse Feed
feed = feedparser.parse(response.text)
if feed.bozo and feed.bozo_exception:
# Feed hatte Parsing-Fehler, aber möglicherweise noch nutzbar
logger.warning(f"Feed {config.topic_label}: Parsing-Warnung: {feed.bozo_exception}")
if not feed.entries:
logger.info(f"Feed {config.topic_label}: Keine Einträge")
result.success = True
return result
items = []
for entry in feed.entries:
# Entry-ID für Duplikat-Check
entry_id = entry.get("id") or entry.get("link") or entry.get("title")
if entry_id in known_ids:
result.skipped_count += 1
continue
# Konvertiere zu AlertItem
alert = self._entry_to_alert(entry, config)
if alert:
items.append(alert)
result.new_items_count += 1
result.items = items
result.success = True
config.last_fetched = datetime.utcnow()
logger.info(
f"Feed {config.topic_label}: {result.new_items_count} neue, "
f"{result.skipped_count} übersprungen"
)
except httpx.HTTPStatusError as e:
result.error = f"HTTP {e.response.status_code}: {e.response.reason_phrase}"
logger.error(f"Feed {config.topic_label}: {result.error}")
except httpx.RequestError as e:
result.error = f"Request failed: {str(e)}"
logger.error(f"Feed {config.topic_label}: {result.error}")
except Exception as e:
result.error = f"Unexpected error: {str(e)}"
logger.exception(f"Feed {config.topic_label}: Unerwarteter Fehler")
return result
def _entry_to_alert(self, entry: dict, config: FeedConfig) -> Optional[AlertItem]:
"""
Konvertiere feedparser Entry zu AlertItem.
Google Alerts Entry Format:
- title: Titel mit HTML-Entities
- link: URL zum Artikel
- published_parsed: Datum als struct_time
- content[0].value: HTML Content mit Snippet
"""
try:
# Title bereinigen
title = unescape(entry.get("title", ""))
title = self._clean_html(title)
# URL extrahieren
url = entry.get("link", "")
if not url:
return None
# Snippet aus Content extrahieren
snippet = ""
if "content" in entry and entry["content"]:
content_html = entry["content"][0].get("value", "")
snippet = self._clean_html(content_html)
elif "summary" in entry:
snippet = self._clean_html(entry["summary"])
# Datum parsen
published_at = None
if "published_parsed" in entry and entry["published_parsed"]:
try:
published_at = datetime(*entry["published_parsed"][:6])
except (TypeError, ValueError):
pass
# AlertItem erstellen
alert = AlertItem(
source=AlertSource.GOOGLE_ALERTS_RSS,
topic_label=config.topic_label,
feed_url=config.url,
title=title,
url=url,
snippet=snippet[:2000], # Limit snippet length
published_at=published_at,
status=AlertStatus.NEW,
)
return alert
except Exception as e:
logger.warning(f"Entry konnte nicht konvertiert werden: {e}")
return None
def _clean_html(self, html: str) -> str:
"""Entferne HTML Tags und bereinige Text."""
if not html:
return ""
# HTML-Entities dekodieren
text = unescape(html)
# HTML Tags entfernen
text = re.sub(r"<[^>]+>", " ", text)
# Whitespace normalisieren
text = re.sub(r"\s+", " ", text)
return text.strip()
async def fetch_all(self, known_entry_ids: Optional[set] = None,
parallel: bool = True) -> list[FetchResult]:
"""
Fetch alle konfigurierten Feeds.
Args:
known_entry_ids: Set von bekannten Entry-IDs (global)
parallel: Wenn True, fetche parallel
Returns:
Liste von FetchResults
"""
active_feeds = [f for f in self.feeds if f.enabled]
if not active_feeds:
logger.warning("Keine aktiven Feeds konfiguriert")
return []
logger.info(f"Fetche {len(active_feeds)} Feeds...")
if parallel:
tasks = [
self.fetch_feed(config, known_entry_ids)
for config in active_feeds
]
results = await asyncio.gather(*tasks, return_exceptions=True)
# Exceptions in FetchResults konvertieren
processed = []
for i, result in enumerate(results):
if isinstance(result, Exception):
processed.append(FetchResult(
feed_url=active_feeds[i].url,
success=False,
error=str(result)
))
else:
processed.append(result)
return processed
else:
results = []
for config in active_feeds:
result = await self.fetch_feed(config, known_entry_ids)
results.append(result)
return results
def get_all_items(self, results: list[FetchResult]) -> list[AlertItem]:
"""Extrahiere alle AlertItems aus FetchResults."""
items = []
for result in results:
if result.success:
items.extend(result.items)
return items
def get_stats(self, results: list[FetchResult]) -> dict:
"""Generiere Statistiken über Fetch-Ergebnisse."""
total_new = sum(r.new_items_count for r in results)
total_skipped = sum(r.skipped_count for r in results)
successful = sum(1 for r in results if r.success)
failed = sum(1 for r in results if not r.success)
return {
"feeds_total": len(results),
"feeds_successful": successful,
"feeds_failed": failed,
"items_new": total_new,
"items_skipped": total_skipped,
"errors": [r.error for r in results if r.error],
}
async def fetch_and_store_feed(
topic_id: str,
feed_url: str,
db,
) -> dict:
"""
Convenience function to fetch a single feed and store results.
This is the function used by the API to trigger manual fetches.
Args:
topic_id: The topic ID to associate with fetched items
feed_url: The RSS feed URL to fetch
db: Database session for storing results
Returns:
dict with new_items and duplicates_skipped counts
"""
from ..db.repository import AlertItemRepository, TopicRepository
if not FEEDPARSER_AVAILABLE:
raise ImportError("feedparser ist nicht installiert")
fetcher = RSSFetcher()
fetcher.add_feed(feed_url, topic_label=topic_id)
# Get known entry IDs to skip duplicates
alert_repo = AlertItemRepository(db)
existing_urls = alert_repo.get_existing_urls(topic_id)
# Fetch the feed
results = await fetcher.fetch_all(known_entry_ids=existing_urls)
await fetcher.close()
if not results:
return {"new_items": 0, "duplicates_skipped": 0}
result = results[0]
if not result.success:
raise Exception(result.error or "Feed fetch failed")
# Store new items
new_count = 0
for item in result.items:
alert_repo.create_from_alert_item(item, topic_id)
new_count += 1
# Update topic stats
topic_repo = TopicRepository(db)
topic_repo.update_fetch_status(
topic_id,
last_fetch_error=None,
items_fetched=new_count,
)
return {
"new_items": new_count,
"duplicates_skipped": result.skipped_count,
}

View File

@@ -0,0 +1,279 @@
"""
Scheduler für automatisches Feed-Fetching.
Verwendet APScheduler für periodische Jobs basierend auf Topic-Konfiguration.
"""
import logging
from datetime import datetime
from typing import Optional
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.interval import IntervalTrigger
from apscheduler.jobstores.memory import MemoryJobStore
from sqlalchemy.orm import Session
from alerts_agent.db.database import SessionLocal
from alerts_agent.db.repository import TopicRepository
from alerts_agent.ingestion.rss_fetcher import fetch_and_store_feed
logger = logging.getLogger(__name__)
# Globaler Scheduler (Singleton)
_scheduler: Optional[AsyncIOScheduler] = None
def get_scheduler() -> AsyncIOScheduler:
"""Gibt den globalen Scheduler zurück, erstellt ihn bei Bedarf."""
global _scheduler
if _scheduler is None:
_scheduler = AsyncIOScheduler(
jobstores={"default": MemoryJobStore()},
job_defaults={
"coalesce": True, # Verpasste Jobs zusammenfassen
"max_instances": 1, # Nur eine Instanz pro Job
"misfire_grace_time": 60, # 60s Toleranz für verpasste Jobs
},
)
return _scheduler
async def fetch_topic_job(topic_id: str, feed_url: str) -> None:
"""
Job-Funktion für das Fetchen eines einzelnen Topics.
Wird vom Scheduler aufgerufen.
"""
db = SessionLocal()
try:
logger.info(f"Scheduler: Fetching topic {topic_id}")
result = await fetch_and_store_feed(
topic_id=topic_id,
feed_url=feed_url,
db=db,
)
logger.info(
f"Scheduler: Topic {topic_id} - {result['new_items']} new, "
f"{result['duplicates_skipped']} skipped"
)
except Exception as e:
logger.error(f"Scheduler: Error fetching topic {topic_id}: {e}")
# Fehler im Topic speichern
repo = TopicRepository(db)
repo.update(topic_id, last_fetch_error=str(e))
finally:
db.close()
def schedule_topic(
topic_id: str,
feed_url: str,
interval_minutes: int = 60,
) -> str:
"""
Plant einen periodischen Fetch-Job für ein Topic.
Args:
topic_id: ID des Topics
feed_url: URL des RSS-Feeds
interval_minutes: Intervall in Minuten
Returns:
Job-ID für spätere Referenz
"""
scheduler = get_scheduler()
job_id = f"fetch_topic_{topic_id}"
# Existierenden Job entfernen falls vorhanden
if scheduler.get_job(job_id):
scheduler.remove_job(job_id)
# Neuen Job hinzufügen
scheduler.add_job(
fetch_topic_job,
trigger=IntervalTrigger(minutes=interval_minutes),
id=job_id,
name=f"Fetch Topic {topic_id}",
kwargs={"topic_id": topic_id, "feed_url": feed_url},
replace_existing=True,
)
logger.info(f"Scheduled topic {topic_id} every {interval_minutes} minutes")
return job_id
def unschedule_topic(topic_id: str) -> bool:
"""
Entfernt den Fetch-Job für ein Topic.
Args:
topic_id: ID des Topics
Returns:
True wenn Job entfernt wurde, False wenn nicht gefunden
"""
scheduler = get_scheduler()
job_id = f"fetch_topic_{topic_id}"
if scheduler.get_job(job_id):
scheduler.remove_job(job_id)
logger.info(f"Unscheduled topic {topic_id}")
return True
return False
def reschedule_topic(
topic_id: str,
feed_url: str,
interval_minutes: int,
) -> str:
"""
Aktualisiert das Intervall für einen Topic-Job.
Args:
topic_id: ID des Topics
feed_url: URL des RSS-Feeds (falls geändert)
interval_minutes: Neues Intervall
Returns:
Job-ID
"""
return schedule_topic(topic_id, feed_url, interval_minutes)
def sync_scheduler_with_db() -> dict:
"""
Synchronisiert den Scheduler mit der Datenbank.
Lädt alle aktiven Topics und plant/entfernt Jobs entsprechend.
Returns:
Dict mit scheduled und unscheduled Counts
"""
db = SessionLocal()
scheduler = get_scheduler()
try:
repo = TopicRepository(db)
topics = repo.get_all()
scheduled = 0
unscheduled = 0
# Aktuelle Job-IDs sammeln
expected_job_ids = set()
for topic in topics:
job_id = f"fetch_topic_{topic.id}"
if topic.is_active and topic.feed_url:
# Topic sollte geplant sein
expected_job_ids.add(job_id)
schedule_topic(
topic_id=topic.id,
feed_url=topic.feed_url,
interval_minutes=topic.fetch_interval_minutes,
)
scheduled += 1
else:
# Topic sollte nicht geplant sein
if scheduler.get_job(job_id):
scheduler.remove_job(job_id)
unscheduled += 1
# Orphan-Jobs entfernen (Topics die gelöscht wurden)
for job in scheduler.get_jobs():
if job.id.startswith("fetch_topic_") and job.id not in expected_job_ids:
scheduler.remove_job(job.id)
unscheduled += 1
logger.info(f"Removed orphan job: {job.id}")
return {"scheduled": scheduled, "unscheduled": unscheduled}
finally:
db.close()
def start_scheduler() -> None:
"""
Startet den Scheduler.
Sollte beim App-Start aufgerufen werden.
"""
scheduler = get_scheduler()
if not scheduler.running:
scheduler.start()
logger.info("Alert scheduler started")
# Initial mit DB synchronisieren
result = sync_scheduler_with_db()
logger.info(
f"Scheduler synced: {result['scheduled']} topics scheduled, "
f"{result['unscheduled']} removed"
)
def stop_scheduler() -> None:
"""
Stoppt den Scheduler.
Sollte beim App-Shutdown aufgerufen werden.
"""
scheduler = get_scheduler()
if scheduler.running:
scheduler.shutdown(wait=False)
logger.info("Alert scheduler stopped")
def get_scheduler_status() -> dict:
"""
Gibt den Status des Schedulers zurück.
Returns:
Dict mit running, jobs_count und job_details
"""
scheduler = get_scheduler()
jobs = []
for job in scheduler.get_jobs():
jobs.append({
"id": job.id,
"name": job.name,
"next_run": job.next_run_time.isoformat() if job.next_run_time else None,
"trigger": str(job.trigger),
})
return {
"running": scheduler.running,
"jobs_count": len(jobs),
"jobs": jobs,
}
# Convenience-Funktion für Topic-Aktivierung
async def on_topic_activated(topic_id: str, feed_url: str, interval_minutes: int) -> None:
"""Hook für Topic-Aktivierung - plant den Fetch-Job."""
schedule_topic(topic_id, feed_url, interval_minutes)
async def on_topic_deactivated(topic_id: str) -> None:
"""Hook für Topic-Deaktivierung - entfernt den Fetch-Job."""
unschedule_topic(topic_id)
async def on_topic_updated(
topic_id: str,
feed_url: str,
interval_minutes: int,
is_active: bool,
) -> None:
"""Hook für Topic-Update - aktualisiert oder entfernt den Fetch-Job."""
if is_active and feed_url:
reschedule_topic(topic_id, feed_url, interval_minutes)
else:
unschedule_topic(topic_id)
async def on_topic_deleted(topic_id: str) -> None:
"""Hook für Topic-Löschung - entfernt den Fetch-Job."""
unschedule_topic(topic_id)

View File

@@ -0,0 +1,12 @@
"""Alert Agent Models."""
from .alert_item import AlertItem, AlertSource, AlertStatus
from .relevance_profile import RelevanceProfile, PriorityItem
__all__ = [
"AlertItem",
"AlertSource",
"AlertStatus",
"RelevanceProfile",
"PriorityItem",
]

View File

@@ -0,0 +1,174 @@
"""
AlertItem Model.
Repräsentiert einen einzelnen Alert aus Google Alerts (RSS oder Email).
"""
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Optional
import hashlib
import uuid
class AlertSource(str, Enum):
"""Quelle des Alerts."""
GOOGLE_ALERTS_RSS = "google_alerts_rss"
GOOGLE_ALERTS_EMAIL = "google_alerts_email"
MANUAL = "manual"
class AlertStatus(str, Enum):
"""Verarbeitungsstatus des Alerts."""
NEW = "new"
PROCESSED = "processed"
DUPLICATE = "duplicate"
SCORED = "scored"
REVIEWED = "reviewed"
ARCHIVED = "archived"
@dataclass
class AlertItem:
"""Ein einzelner Alert-Eintrag."""
# Identifikation
id: str = field(default_factory=lambda: str(uuid.uuid4()))
# Quelle
source: AlertSource = AlertSource.GOOGLE_ALERTS_RSS
topic_label: str = "" # z.B. "Schulrecht Bayern"
feed_url: Optional[str] = None
# Content
title: str = ""
url: str = ""
snippet: str = ""
article_text: Optional[str] = None
# Metadaten
lang: str = "de"
published_at: Optional[datetime] = None
fetched_at: datetime = field(default_factory=datetime.utcnow)
# Deduplication
canonical_url: Optional[str] = None
url_hash: Optional[str] = None
content_hash: Optional[str] = None # SimHash für fuzzy matching
# Verarbeitung
status: AlertStatus = AlertStatus.NEW
cluster_id: Optional[str] = None
# Relevanz (nach Scoring)
relevance_score: Optional[float] = None # 0.0 - 1.0
relevance_decision: Optional[str] = None # KEEP, DROP, REVIEW
relevance_reasons: list = field(default_factory=list)
relevance_summary: Optional[str] = None
def __post_init__(self):
"""Berechne Hashes nach Initialisierung."""
if not self.url_hash and self.url:
self.url_hash = self._compute_url_hash()
if not self.canonical_url and self.url:
self.canonical_url = self._normalize_url(self.url)
def _compute_url_hash(self) -> str:
"""Berechne SHA256 Hash der URL."""
normalized = self._normalize_url(self.url)
return hashlib.sha256(normalized.encode()).hexdigest()[:16]
def _normalize_url(self, url: str) -> str:
"""Normalisiere URL für Deduplizierung."""
# Entferne Tracking-Parameter
import urllib.parse
parsed = urllib.parse.urlparse(url)
# Google News Redirect auflösen
if "news.google.com" in parsed.netloc and "/articles/" in parsed.path:
# news.google.com URLs enthalten die echte URL base64-kodiert
# Hier nur Basic-Handling - echte Auflösung komplexer
pass
# Tracking-Parameter entfernen
tracking_params = {
"utm_source", "utm_medium", "utm_campaign", "utm_content", "utm_term",
"fbclid", "gclid", "ref", "source"
}
query_params = urllib.parse.parse_qs(parsed.query)
cleaned_params = {k: v for k, v in query_params.items()
if k.lower() not in tracking_params}
cleaned_query = urllib.parse.urlencode(cleaned_params, doseq=True)
# Rekonstruiere URL ohne Fragment
normalized = urllib.parse.urlunparse((
parsed.scheme,
parsed.netloc.lower(),
parsed.path.rstrip("/"),
parsed.params,
cleaned_query,
"" # No fragment
))
return normalized
def compute_content_hash(self, text: Optional[str] = None) -> str:
"""
Berechne SimHash des Inhalts für Fuzzy-Matching.
SimHash erlaubt es, ähnliche Texte zu erkennen, auch wenn sie
sich leicht unterscheiden (z.B. verschiedene Quellen zum selben Thema).
"""
from ..processing.dedup import compute_simhash
content = text or self.article_text or self.snippet or self.title
if content:
self.content_hash = compute_simhash(content)
return self.content_hash or ""
def to_dict(self) -> dict:
"""Konvertiere zu Dictionary für JSON/DB."""
return {
"id": self.id,
"source": self.source.value,
"topic_label": self.topic_label,
"feed_url": self.feed_url,
"title": self.title,
"url": self.url,
"snippet": self.snippet,
"article_text": self.article_text,
"lang": self.lang,
"published_at": self.published_at.isoformat() if self.published_at else None,
"fetched_at": self.fetched_at.isoformat() if self.fetched_at else None,
"canonical_url": self.canonical_url,
"url_hash": self.url_hash,
"content_hash": self.content_hash,
"status": self.status.value,
"cluster_id": self.cluster_id,
"relevance_score": self.relevance_score,
"relevance_decision": self.relevance_decision,
"relevance_reasons": self.relevance_reasons,
"relevance_summary": self.relevance_summary,
}
@classmethod
def from_dict(cls, data: dict) -> "AlertItem":
"""Erstelle AlertItem aus Dictionary."""
# Parse Enums
if "source" in data and isinstance(data["source"], str):
data["source"] = AlertSource(data["source"])
if "status" in data and isinstance(data["status"], str):
data["status"] = AlertStatus(data["status"])
# Parse Timestamps
for field_name in ["published_at", "fetched_at"]:
if field_name in data and isinstance(data[field_name], str):
data[field_name] = datetime.fromisoformat(data[field_name])
return cls(**data)
def __repr__(self) -> str:
return f"AlertItem(id={self.id[:8]}, title='{self.title[:50]}...', status={self.status.value})"

View File

@@ -0,0 +1,288 @@
"""
RelevanceProfile Model.
Definiert das Relevanzprofil eines Nutzers für die Alerts-Filterung.
Lernt über Zeit durch Feedback.
"""
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional
import uuid
@dataclass
class PriorityItem:
"""Ein Prioritäts-Thema im Profil."""
label: str # z.B. "Inklusion", "Datenschutz Schule"
weight: float = 0.5 # 0.0 - 1.0, höher = wichtiger
keywords: list = field(default_factory=list) # Zusätzliche Keywords
description: Optional[str] = None # Kontext für LLM
def to_dict(self) -> dict:
return {
"label": self.label,
"weight": self.weight,
"keywords": self.keywords,
"description": self.description,
}
@classmethod
def from_dict(cls, data: dict) -> "PriorityItem":
return cls(**data)
@dataclass
class RelevanceProfile:
"""
Nutzerprofil für Relevanz-Scoring.
Das Profil wird verwendet, um Alerts auf Relevanz zu prüfen.
Es enthält:
- Prioritäten: Themen die wichtig sind (mit Gewichtung)
- Ausschlüsse: Themen die ignoriert werden sollen
- Positive Beispiele: URLs/Titel die relevant waren
- Negative Beispiele: URLs/Titel die irrelevant waren
- Policies: Zusätzliche Regeln (z.B. nur deutsche Quellen)
"""
# Identifikation
id: str = field(default_factory=lambda: str(uuid.uuid4()))
user_id: Optional[str] = None # Falls benutzerspezifisch
# Relevanz-Kriterien
priorities: list = field(default_factory=list) # List[PriorityItem]
exclusions: list = field(default_factory=list) # Keywords zum Ausschließen
# Beispiele für Few-Shot Learning
positive_examples: list = field(default_factory=list) # Relevante Alerts
negative_examples: list = field(default_factory=list) # Irrelevante Alerts
# Policies
policies: dict = field(default_factory=dict)
# Metadaten
created_at: datetime = field(default_factory=datetime.utcnow)
updated_at: datetime = field(default_factory=datetime.utcnow)
# Statistiken
total_scored: int = 0
total_kept: int = 0
total_dropped: int = 0
accuracy_estimate: Optional[float] = None # Geschätzte Genauigkeit
def add_priority(self, label: str, weight: float = 0.5, **kwargs) -> None:
"""Füge ein Prioritäts-Thema hinzu."""
self.priorities.append(PriorityItem(
label=label,
weight=weight,
**kwargs
))
self.updated_at = datetime.utcnow()
def add_exclusion(self, keyword: str) -> None:
"""Füge ein Ausschluss-Keyword hinzu."""
if keyword not in self.exclusions:
self.exclusions.append(keyword)
self.updated_at = datetime.utcnow()
def add_positive_example(self, title: str, url: str, reason: str = "") -> None:
"""Füge ein positives Beispiel hinzu (für Few-Shot Learning)."""
self.positive_examples.append({
"title": title,
"url": url,
"reason": reason,
"added_at": datetime.utcnow().isoformat(),
})
# Begrenze auf letzte 20 Beispiele
self.positive_examples = self.positive_examples[-20:]
self.updated_at = datetime.utcnow()
def add_negative_example(self, title: str, url: str, reason: str = "") -> None:
"""Füge ein negatives Beispiel hinzu."""
self.negative_examples.append({
"title": title,
"url": url,
"reason": reason,
"added_at": datetime.utcnow().isoformat(),
})
# Begrenze auf letzte 20 Beispiele
self.negative_examples = self.negative_examples[-20:]
self.updated_at = datetime.utcnow()
def update_from_feedback(self, alert_title: str, alert_url: str,
is_relevant: bool, reason: str = "") -> None:
"""
Aktualisiere Profil basierend auf Nutzer-Feedback.
Args:
alert_title: Titel des Alerts
alert_url: URL des Alerts
is_relevant: True wenn der Nutzer den Alert als relevant markiert hat
reason: Optional - Grund für die Entscheidung
"""
if is_relevant:
self.add_positive_example(alert_title, alert_url, reason)
self.total_kept += 1
else:
self.add_negative_example(alert_title, alert_url, reason)
self.total_dropped += 1
self.total_scored += 1
# Aktualisiere Accuracy-Schätzung (vereinfacht)
if self.total_scored > 10:
# Hier könnte eine komplexere Berechnung erfolgen
# basierend auf Vergleich von Vorhersage vs. tatsächlichem Feedback
pass
def get_prompt_context(self) -> str:
"""
Generiere Kontext für LLM-Prompt.
Dieser Text wird in den System-Prompt des Relevanz-Scorers eingefügt.
"""
lines = ["## Relevanzprofil des Nutzers\n"]
# Prioritäten
if self.priorities:
lines.append("### Prioritäten (Themen von Interesse):")
for p in self.priorities:
if isinstance(p, dict):
p = PriorityItem.from_dict(p)
weight_label = "Sehr wichtig" if p.weight > 0.7 else "Wichtig" if p.weight > 0.4 else "Interessant"
lines.append(f"- **{p.label}** ({weight_label})")
if p.description:
lines.append(f" {p.description}")
if p.keywords:
lines.append(f" Keywords: {', '.join(p.keywords)}")
lines.append("")
# Ausschlüsse
if self.exclusions:
lines.append("### Ausschlüsse (ignorieren):")
lines.append(f"Themen mit diesen Keywords: {', '.join(self.exclusions)}")
lines.append("")
# Positive Beispiele
if self.positive_examples:
lines.append("### Beispiele für relevante Alerts:")
for ex in self.positive_examples[-5:]: # Letzte 5
lines.append(f"- \"{ex['title']}\"")
if ex.get("reason"):
lines.append(f" Grund: {ex['reason']}")
lines.append("")
# Negative Beispiele
if self.negative_examples:
lines.append("### Beispiele für irrelevante Alerts:")
for ex in self.negative_examples[-5:]: # Letzte 5
lines.append(f"- \"{ex['title']}\"")
if ex.get("reason"):
lines.append(f" Grund: {ex['reason']}")
lines.append("")
# Policies
if self.policies:
lines.append("### Zusätzliche Regeln:")
for key, value in self.policies.items():
lines.append(f"- {key}: {value}")
return "\n".join(lines)
def to_dict(self) -> dict:
"""Konvertiere zu Dictionary."""
return {
"id": self.id,
"user_id": self.user_id,
"priorities": [p.to_dict() if isinstance(p, PriorityItem) else p
for p in self.priorities],
"exclusions": self.exclusions,
"positive_examples": self.positive_examples,
"negative_examples": self.negative_examples,
"policies": self.policies,
"created_at": self.created_at.isoformat(),
"updated_at": self.updated_at.isoformat(),
"total_scored": self.total_scored,
"total_kept": self.total_kept,
"total_dropped": self.total_dropped,
"accuracy_estimate": self.accuracy_estimate,
}
@classmethod
def from_dict(cls, data: dict) -> "RelevanceProfile":
"""Erstelle RelevanceProfile aus Dictionary."""
# Parse Timestamps
for field_name in ["created_at", "updated_at"]:
if field_name in data and isinstance(data[field_name], str):
data[field_name] = datetime.fromisoformat(data[field_name])
# Parse Priorities
if "priorities" in data:
data["priorities"] = [
PriorityItem.from_dict(p) if isinstance(p, dict) else p
for p in data["priorities"]
]
return cls(**data)
@classmethod
def create_default_education_profile(cls) -> "RelevanceProfile":
"""
Erstelle ein Standard-Profil für Bildungsthemen.
Dieses Profil ist für Lehrkräfte/Schulpersonal optimiert.
"""
profile = cls()
# Bildungs-relevante Prioritäten
profile.add_priority(
"Inklusion",
weight=0.9,
keywords=["inklusiv", "Förderbedarf", "Behinderung", "Barrierefreiheit"],
description="Inklusive Bildung, Förderschulen, Nachteilsausgleich"
)
profile.add_priority(
"Datenschutz Schule",
weight=0.85,
keywords=["DSGVO", "Schülerfotos", "Einwilligung", "personenbezogene Daten"],
description="DSGVO in Schulen, Datenschutz bei Klassenfotos"
)
profile.add_priority(
"Schulrecht Bayern",
weight=0.8,
keywords=["BayEUG", "Schulordnung", "Kultusministerium", "Bayern"],
description="Bayerisches Schulrecht, Verordnungen"
)
profile.add_priority(
"Digitalisierung Schule",
weight=0.7,
keywords=["DigitalPakt", "Tablet-Klasse", "Lernplattform"],
description="Digitale Medien im Unterricht"
)
profile.add_priority(
"Elternarbeit",
weight=0.6,
keywords=["Elternbeirat", "Elternabend", "Kommunikation"],
description="Zusammenarbeit mit Eltern"
)
# Standard-Ausschlüsse
profile.exclusions = [
"Stellenanzeige",
"Praktikum gesucht",
"Werbung",
"Pressemitteilung", # Oft generisch
]
# Policies
profile.policies = {
"prefer_german_sources": True,
"max_age_days": 30, # Ältere Alerts ignorieren
"min_content_length": 100, # Sehr kurze Snippets ignorieren
}
return profile
def __repr__(self) -> str:
return f"RelevanceProfile(id={self.id[:8]}, priorities={len(self.priorities)}, examples={len(self.positive_examples) + len(self.negative_examples)})"

View File

@@ -0,0 +1,12 @@
"""Alert Processing Modules."""
from .dedup import compute_simhash, hamming_distance, find_duplicates
from .relevance_scorer import RelevanceScorer, ScoringResult
__all__ = [
"compute_simhash",
"hamming_distance",
"find_duplicates",
"RelevanceScorer",
"ScoringResult",
]

View File

@@ -0,0 +1,239 @@
"""
Deduplizierung für Alerts.
Nutzt SimHash für Fuzzy-Matching von ähnlichen Texten.
SimHash ist ein Locality-Sensitive Hash, bei dem ähnliche Texte
ähnliche Hashes produzieren.
"""
import hashlib
import re
from typing import Optional
from collections import Counter
# SimHash Parameter
SIMHASH_BITS = 64
SHINGLE_SIZE = 3 # Anzahl aufeinanderfolgender Wörter
def _tokenize(text: str) -> list:
"""
Tokenisiere Text in normalisierte Wörter.
- Lowercase
- Nur alphanumerische Zeichen
- Stoppwörter entfernen (deutsche)
"""
# Deutsche Stoppwörter (häufige Wörter ohne semantischen Wert)
STOPWORDS = {
"der", "die", "das", "den", "dem", "des", "ein", "eine", "einer", "eines",
"und", "oder", "aber", "doch", "wenn", "weil", "dass", "als", "auch",
"ist", "sind", "war", "waren", "wird", "werden", "wurde", "wurden",
"hat", "haben", "hatte", "hatten", "kann", "können", "konnte", "konnten",
"für", "von", "mit", "bei", "nach", "aus", "über", "unter", "vor", "hinter",
"auf", "an", "in", "im", "am", "um", "bis", "durch", "ohne", "gegen",
"nicht", "noch", "nur", "schon", "sehr", "mehr", "sich", "es", "sie", "er",
"wir", "ihr", "ich", "du", "man", "so", "wie", "was", "wer", "wo", "wann",
}
# Normalisiere
text = text.lower()
# Nur Buchstaben, Zahlen und Umlaute
text = re.sub(r"[^a-zäöüß0-9\s]", " ", text)
# Tokenisiere
words = text.split()
# Filtere Stoppwörter und kurze Wörter
words = [w for w in words if w not in STOPWORDS and len(w) > 2]
return words
def _create_shingles(words: list, size: int = SHINGLE_SIZE) -> list:
"""
Erstelle Shingles (n-Gramme) aus Wortliste.
Shingles sind überlappende Sequenzen von Wörtern.
z.B. ["a", "b", "c", "d"] mit size=2 -> ["a b", "b c", "c d"]
"""
if len(words) < size:
return [" ".join(words)] if words else []
return [" ".join(words[i:i+size]) for i in range(len(words) - size + 1)]
def _hash_shingle(shingle: str) -> int:
"""Hash ein Shingle zu einer 64-bit Zahl."""
# Nutze MD5 und nimm erste 8 Bytes (64 bit)
h = hashlib.md5(shingle.encode()).digest()[:8]
return int.from_bytes(h, byteorder="big")
def compute_simhash(text: str) -> str:
"""
Berechne SimHash eines Texts.
SimHash funktioniert wie folgt:
1. Text in Shingles (Wort-n-Gramme) aufteilen
2. Jeden Shingle hashen
3. Für jeden Hash: Wenn Bit=1 -> +1, sonst -1
4. Summieren über alle Hashes
5. Wenn Summe > 0 -> Bit=1, sonst 0
Returns:
16-Zeichen Hex-String (64 bit)
"""
if not text:
return "0" * 16
words = _tokenize(text)
if not words:
return "0" * 16
shingles = _create_shingles(words)
if not shingles:
return "0" * 16
# Bit-Vektoren initialisieren
v = [0] * SIMHASH_BITS
for shingle in shingles:
h = _hash_shingle(shingle)
for i in range(SIMHASH_BITS):
bit = (h >> i) & 1
if bit:
v[i] += 1
else:
v[i] -= 1
# Finalen Hash berechnen
simhash = 0
for i in range(SIMHASH_BITS):
if v[i] > 0:
simhash |= (1 << i)
return format(simhash, "016x")
def hamming_distance(hash1: str, hash2: str) -> int:
"""
Berechne Hamming-Distanz zwischen zwei SimHashes.
Die Hamming-Distanz ist die Anzahl der unterschiedlichen Bits.
Je kleiner, desto ähnlicher sind die Texte.
Typische Schwellenwerte:
- 0-3: Sehr ähnlich (wahrscheinlich Duplikat)
- 4-7: Ähnlich (gleiches Thema)
- 8+: Unterschiedlich
Returns:
Anzahl unterschiedlicher Bits (0-64)
"""
if not hash1 or not hash2:
return SIMHASH_BITS
try:
h1 = int(hash1, 16)
h2 = int(hash2, 16)
except ValueError:
return SIMHASH_BITS
xor = h1 ^ h2
return bin(xor).count("1")
def are_similar(hash1: str, hash2: str, threshold: int = 5) -> bool:
"""
Prüfe ob zwei Hashes auf ähnliche Texte hindeuten.
Args:
hash1: Erster SimHash
hash2: Zweiter SimHash
threshold: Maximale Hamming-Distanz für Ähnlichkeit
Returns:
True wenn Texte wahrscheinlich ähnlich sind
"""
return hamming_distance(hash1, hash2) <= threshold
def find_duplicates(items: list, hash_field: str = "content_hash",
threshold: int = 3) -> dict:
"""
Finde Duplikate/Cluster in einer Liste von Items.
Args:
items: Liste von Objekten mit hash_field Attribut
hash_field: Name des Attributs das den SimHash enthält
threshold: Max Hamming-Distanz für Duplikat-Erkennung
Returns:
Dict mit {item_id: cluster_id} für Duplikate
"""
clusters = {} # cluster_id -> list of items
item_to_cluster = {} # item_id -> cluster_id
cluster_counter = 0
for item in items:
item_id = getattr(item, "id", str(id(item)))
item_hash = getattr(item, hash_field, None)
if not item_hash:
continue
# Suche nach existierendem Cluster
found_cluster = None
for cluster_id, cluster_items in clusters.items():
for existing_item in cluster_items:
existing_hash = getattr(existing_item, hash_field, None)
if existing_hash and hamming_distance(item_hash, existing_hash) <= threshold:
found_cluster = cluster_id
break
if found_cluster:
break
if found_cluster:
clusters[found_cluster].append(item)
item_to_cluster[item_id] = found_cluster
else:
# Neuen Cluster starten
cluster_id = f"cluster_{cluster_counter}"
cluster_counter += 1
clusters[cluster_id] = [item]
item_to_cluster[item_id] = cluster_id
# Filtere Single-Item Cluster (keine echten Duplikate)
duplicates = {}
for item_id, cluster_id in item_to_cluster.items():
if len(clusters[cluster_id]) > 1:
duplicates[item_id] = cluster_id
return duplicates
def exact_url_duplicates(items: list, url_field: str = "canonical_url") -> set:
"""
Finde exakte URL-Duplikate.
Returns:
Set von Item-IDs die Duplikate sind (nicht das Original)
"""
seen_urls = {} # url -> first item id
duplicates = set()
for item in items:
item_id = getattr(item, "id", str(id(item)))
url = getattr(item, url_field, None)
if not url:
continue
if url in seen_urls:
duplicates.add(item_id)
else:
seen_urls[url] = item_id
return duplicates

View File

@@ -0,0 +1,458 @@
"""
Digest Generator fuer Wochenzusammenfassungen.
Generiert LLM-basierte Zusammenfassungen der wichtigsten Alerts:
- Gruppierung nach Wichtigkeit (Kritisch, Dringend, Wichtig, etc.)
- Kurze Zusammenfassung pro Kategorie
- HTML-Ausgabe fuer E-Mail und UI
- PDF-Export
Verwendung:
generator = DigestGenerator(db_session, llm_client)
digest = await generator.generate_weekly_digest(user_id)
"""
import uuid
from typing import List, Dict, Any, Optional
from datetime import datetime, timedelta
from dataclasses import dataclass
import json
import os
from ..db.models import (
AlertItemDB, AlertDigestDB, UserAlertSubscriptionDB,
ImportanceLevelEnum, DigestStatusEnum
)
@dataclass
class DigestSection:
"""Eine Sektion im Digest (z.B. Kritisch, Dringend)."""
importance_level: ImportanceLevelEnum
label_de: str
color: str
items: List[AlertItemDB]
summary: str = ""
@dataclass
class DigestContent:
"""Vollstaendiger Digest-Inhalt."""
user_id: str
period_start: datetime
period_end: datetime
sections: List[DigestSection]
total_alerts: int
critical_count: int
urgent_count: int
introduction: str = ""
html: str = ""
class DigestGenerator:
"""
Generiert Wochenzusammenfassungen fuer Alerts.
Unterstuetzt:
- Lokale Ollama-Modelle
- OpenAI API
- Anthropic API
"""
def __init__(
self,
db_session,
llm_provider: str = "ollama",
llm_model: str = "llama3.2:3b"
):
"""
Initialisiere den Digest Generator.
Args:
db_session: SQLAlchemy Session
llm_provider: "ollama", "openai", oder "anthropic"
llm_model: Modellname
"""
self.db = db_session
self.llm_provider = llm_provider
self.llm_model = llm_model
async def generate_weekly_digest(
self,
user_id: str,
weeks_back: int = 1
) -> Optional[AlertDigestDB]:
"""
Generiere einen Wochendigest fuer einen User.
Args:
user_id: User-ID
weeks_back: Wie viele Wochen zurueck (default: letzte Woche)
Returns:
AlertDigestDB oder None bei Fehler
"""
# Zeitraum berechnen
now = datetime.utcnow()
period_end = now - timedelta(days=now.weekday()) # Montag dieser Woche
period_start = period_end - timedelta(weeks=weeks_back)
# Alerts laden
alerts = self._load_alerts_for_period(user_id, period_start, period_end)
if not alerts:
return None
# Nach Wichtigkeit gruppieren
sections = self._group_by_importance(alerts)
# Digest-Content erstellen
content = DigestContent(
user_id=user_id,
period_start=period_start,
period_end=period_end,
sections=sections,
total_alerts=len(alerts),
critical_count=len([a for a in alerts if a.importance_level == ImportanceLevelEnum.KRITISCH]),
urgent_count=len([a for a in alerts if a.importance_level == ImportanceLevelEnum.DRINGEND])
)
# LLM-Zusammenfassungen generieren
await self._generate_summaries(content)
# HTML generieren
content.html = self._generate_html(content)
# In DB speichern
digest = self._save_digest(content)
return digest
def _load_alerts_for_period(
self,
user_id: str,
start: datetime,
end: datetime
) -> List[AlertItemDB]:
"""Lade alle Alerts fuer einen Zeitraum."""
return self.db.query(AlertItemDB).filter(
AlertItemDB.user_id == user_id,
AlertItemDB.fetched_at >= start,
AlertItemDB.fetched_at < end,
AlertItemDB.status != "dropped"
).order_by(AlertItemDB.fetched_at.desc()).all()
def _group_by_importance(
self,
alerts: List[AlertItemDB]
) -> List[DigestSection]:
"""Gruppiere Alerts nach Wichtigkeit."""
importance_config = [
(ImportanceLevelEnum.KRITISCH, "Kritisch", "#dc2626"),
(ImportanceLevelEnum.DRINGEND, "Dringend", "#ea580c"),
(ImportanceLevelEnum.WICHTIG, "Wichtig", "#d97706"),
(ImportanceLevelEnum.PRUEFEN, "Zu pruefen", "#2563eb"),
(ImportanceLevelEnum.INFO, "Info", "#64748b"),
]
sections = []
for level, label, color in importance_config:
items = [a for a in alerts if a.importance_level == level]
if items:
sections.append(DigestSection(
importance_level=level,
label_de=label,
color=color,
items=items[:5] # Max 5 pro Kategorie
))
return sections
async def _generate_summaries(self, content: DigestContent):
"""Generiere LLM-basierte Zusammenfassungen."""
# Einleitung generieren
content.introduction = await self._generate_introduction(content)
# Zusammenfassungen pro Sektion
for section in content.sections:
section.summary = await self._generate_section_summary(section)
async def _generate_introduction(self, content: DigestContent) -> str:
"""Generiere eine einleitende Zusammenfassung."""
prompt = f"""Du bist ein Assistent fuer Schulleitungen und Lehrkraefte in Deutschland.
Schreibe eine kurze Einleitung (2-3 Saetze) fuer einen Wochenbericht.
Zeitraum: {content.period_start.strftime('%d.%m.%Y')} - {content.period_end.strftime('%d.%m.%Y')}
Gesamt: {content.total_alerts} Meldungen
Kritisch: {content.critical_count}
Dringend: {content.urgent_count}
Schreibe auf Deutsch in einfacher Sprache (B1/B2 Niveau).
Beginne mit "Diese Woche..." oder "In der vergangenen Woche..."."""
return await self._call_llm(prompt, max_tokens=150)
async def _generate_section_summary(self, section: DigestSection) -> str:
"""Generiere Zusammenfassung fuer eine Sektion."""
if not section.items:
return ""
titles = "\n".join([f"- {item.title}" for item in section.items[:5]])
prompt = f"""Fasse diese {len(section.items)} Meldungen der Kategorie "{section.label_de}" in 1-2 Saetzen zusammen:
{titles}
Schreibe auf Deutsch in einfacher Sprache. Nenne die wichtigsten Handlungsbedarfe."""
return await self._call_llm(prompt, max_tokens=100)
async def _call_llm(self, prompt: str, max_tokens: int = 200) -> str:
"""Rufe das LLM auf."""
try:
if self.llm_provider == "ollama":
return await self._call_ollama(prompt, max_tokens)
elif self.llm_provider == "openai":
return await self._call_openai(prompt, max_tokens)
elif self.llm_provider == "anthropic":
return await self._call_anthropic(prompt, max_tokens)
else:
return self._generate_fallback_summary(prompt)
except Exception as e:
print(f"LLM call failed: {e}")
return self._generate_fallback_summary(prompt)
async def _call_ollama(self, prompt: str, max_tokens: int) -> str:
"""Rufe lokales Ollama-Modell auf."""
import httpx
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
"http://localhost:11434/api/generate",
json={
"model": self.llm_model,
"prompt": prompt,
"stream": False,
"options": {
"num_predict": max_tokens,
"temperature": 0.7
}
}
)
if response.status_code == 200:
data = response.json()
return data.get("response", "").strip()
except Exception as e:
print(f"Ollama error: {e}")
return self._generate_fallback_summary(prompt)
async def _call_openai(self, prompt: str, max_tokens: int) -> str:
"""Rufe OpenAI API auf."""
import httpx
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
return self._generate_fallback_summary(prompt)
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
"https://api.openai.com/v1/chat/completions",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
},
json={
"model": self.llm_model or "gpt-4o-mini",
"messages": [{"role": "user", "content": prompt}],
"max_tokens": max_tokens,
"temperature": 0.7
}
)
if response.status_code == 200:
data = response.json()
return data["choices"][0]["message"]["content"].strip()
except Exception as e:
print(f"OpenAI error: {e}")
return self._generate_fallback_summary(prompt)
async def _call_anthropic(self, prompt: str, max_tokens: int) -> str:
"""Rufe Anthropic API auf."""
import httpx
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
return self._generate_fallback_summary(prompt)
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
"https://api.anthropic.com/v1/messages",
headers={
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
"Content-Type": "application/json"
},
json={
"model": self.llm_model or "claude-3-5-sonnet-latest",
"max_tokens": max_tokens,
"messages": [{"role": "user", "content": prompt}]
}
)
if response.status_code == 200:
data = response.json()
return data["content"][0]["text"].strip()
except Exception as e:
print(f"Anthropic error: {e}")
return self._generate_fallback_summary(prompt)
def _generate_fallback_summary(self, prompt: str) -> str:
"""Fallback ohne LLM."""
if "Einleitung" in prompt or "Wochenbericht" in prompt:
return "Diese Woche haben Sie neue relevante Meldungen erhalten. Hier ist Ihre Zusammenfassung."
return "Mehrere relevante Meldungen zu diesem Thema."
def _generate_html(self, content: DigestContent) -> str:
"""Generiere HTML fuer den Digest."""
sections_html = ""
for section in content.sections:
items_html = ""
for item in section.items:
items_html += f"""
<tr>
<td style="padding: 12px; border-bottom: 1px solid #e2e8f0;">
<div style="font-weight: 500; color: #1e293b; margin-bottom: 4px;">{item.title}</div>
<div style="font-size: 12px; color: #64748b;">{item.source_name or 'Unbekannt'}</div>
</td>
<td style="padding: 12px; border-bottom: 1px solid #e2e8f0; text-align: right;">
<a href="{item.url or '#'}" style="color: #3b82f6; text-decoration: none;">Oeffnen</a>
</td>
</tr>
"""
sections_html += f"""
<div style="margin-bottom: 24px;">
<div style="display: flex; align-items: center; margin-bottom: 12px;">
<span style="display: inline-block; width: 12px; height: 12px; background: {section.color}; border-radius: 50%; margin-right: 8px;"></span>
<h3 style="margin: 0; font-size: 18px; color: #1e293b;">{section.label_de}</h3>
<span style="margin-left: 8px; font-size: 14px; color: #64748b;">({len(section.items)} Meldungen)</span>
</div>
{f'<p style="font-size: 14px; color: #475569; margin-bottom: 12px;">{section.summary}</p>' if section.summary else ''}
<table style="width: 100%; border-collapse: collapse;">
{items_html}
</table>
</div>
"""
return f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Wochenbericht - BreakPilot Alerts</title>
</head>
<body style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; max-width: 700px; margin: 0 auto; padding: 20px; background: #f8fafc;">
<div style="background: white; border-radius: 12px; padding: 32px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
<!-- Header -->
<div style="text-align: center; margin-bottom: 32px; padding-bottom: 24px; border-bottom: 1px solid #e2e8f0;">
<h1 style="margin: 0 0 8px 0; font-size: 24px; color: #1e293b;">Wochenbericht</h1>
<p style="margin: 0; color: #64748b; font-size: 14px;">
{content.period_start.strftime('%d.%m.%Y')} - {content.period_end.strftime('%d.%m.%Y')}
</p>
</div>
<!-- Stats -->
<div style="display: flex; justify-content: center; gap: 32px; margin-bottom: 32px; padding: 16px; background: #f1f5f9; border-radius: 8px;">
<div style="text-align: center;">
<div style="font-size: 28px; font-weight: 700; color: #1e293b;">{content.total_alerts}</div>
<div style="font-size: 12px; color: #64748b;">Gesamt</div>
</div>
<div style="text-align: center;">
<div style="font-size: 28px; font-weight: 700; color: #dc2626;">{content.critical_count}</div>
<div style="font-size: 12px; color: #64748b;">Kritisch</div>
</div>
<div style="text-align: center;">
<div style="font-size: 28px; font-weight: 700; color: #ea580c;">{content.urgent_count}</div>
<div style="font-size: 12px; color: #64748b;">Dringend</div>
</div>
</div>
<!-- Introduction -->
{f'<p style="font-size: 15px; color: #334155; line-height: 1.6; margin-bottom: 24px;">{content.introduction}</p>' if content.introduction else ''}
<!-- Sections -->
{sections_html}
<!-- Footer -->
<div style="margin-top: 32px; padding-top: 24px; border-top: 1px solid #e2e8f0; text-align: center; font-size: 12px; color: #94a3b8;">
<p>Dieser Bericht wurde automatisch von BreakPilot Alerts erstellt.</p>
<p><a href="#" style="color: #3b82f6; text-decoration: none;">Einstellungen anpassen</a> | <a href="#" style="color: #3b82f6; text-decoration: none;">Abmelden</a></p>
</div>
</div>
</body>
</html>
"""
def _save_digest(self, content: DigestContent) -> AlertDigestDB:
"""Speichere Digest in der Datenbank."""
# Finde Subscription fuer User
subscription = self.db.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.user_id == content.user_id,
UserAlertSubscriptionDB.is_active == True
).first()
digest = AlertDigestDB(
id=str(uuid.uuid4()),
subscription_id=subscription.id if subscription else None,
user_id=content.user_id,
period_start=content.period_start,
period_end=content.period_end,
summary_html=content.html,
total_alerts=content.total_alerts,
critical_count=content.critical_count,
urgent_count=content.urgent_count,
important_count=sum(len(s.items) for s in content.sections if s.importance_level == ImportanceLevelEnum.WICHTIG),
review_count=sum(len(s.items) for s in content.sections if s.importance_level == ImportanceLevelEnum.PRUEFEN),
info_count=sum(len(s.items) for s in content.sections if s.importance_level == ImportanceLevelEnum.INFO),
status=DigestStatusEnum.PENDING
)
self.db.add(digest)
self.db.commit()
self.db.refresh(digest)
return digest
async def generate_digest_for_all_users(db_session) -> int:
"""
Generiere Digests fuer alle aktiven Subscriptions.
Wird vom Scheduler (z.B. Celery, APScheduler) aufgerufen.
Returns:
Anzahl generierter Digests
"""
# Finde alle aktiven Subscriptions mit Digest aktiviert
subscriptions = db_session.query(UserAlertSubscriptionDB).filter(
UserAlertSubscriptionDB.is_active == True,
UserAlertSubscriptionDB.digest_enabled == True
).all()
generator = DigestGenerator(db_session)
count = 0
for sub in subscriptions:
try:
digest = await generator.generate_weekly_digest(sub.user_id)
if digest:
count += 1
except Exception as e:
print(f"Error generating digest for user {sub.user_id}: {e}")
return count

View File

@@ -0,0 +1,341 @@
"""
Importance Mapping für Guided Mode.
Konvertiert Relevanz-Scores (0.0-1.0) in 5-stufige Wichtigkeitsstufen:
- KRITISCH (90-100%): Sofortiges Handeln erforderlich
- DRINGEND (75-90%): Wichtig, bald handeln
- WICHTIG (60-75%): Beachtenswert
- PRÜFEN (40-60%): Eventuell relevant
- INFO (0-40%): Zur Kenntnisnahme
Zusätzlich: Generierung von "Warum relevant?"-Erklärungen und nächsten Schritten.
"""
from typing import Optional, List, Dict, Any
from datetime import datetime, timedelta
import re
from ..db.models import ImportanceLevelEnum, AlertItemDB
# Re-export fuer einfacheren Import
__all__ = [
'ImportanceLevelEnum',
'score_to_importance',
'importance_to_label_de',
'importance_to_color',
'extract_deadline',
'generate_why_relevant',
'generate_next_steps',
'enrich_alert_for_guided_mode',
'batch_enrich_alerts',
'filter_by_importance',
]
# Standard-Schwellenwerte für Importance-Mapping
DEFAULT_THRESHOLDS = {
"kritisch": 0.90,
"dringend": 0.75,
"wichtig": 0.60,
"pruefen": 0.40,
}
def score_to_importance(
score: float,
thresholds: Dict[str, float] = None
) -> ImportanceLevelEnum:
"""
Konvertiere Relevanz-Score zu Importance-Level.
Args:
score: Relevanz-Score (0.0 - 1.0)
thresholds: Optionale benutzerdefinierte Schwellenwerte
Returns:
ImportanceLevelEnum
"""
if score is None:
return ImportanceLevelEnum.INFO
thresholds = thresholds or DEFAULT_THRESHOLDS
if score >= thresholds.get("kritisch", 0.90):
return ImportanceLevelEnum.KRITISCH
elif score >= thresholds.get("dringend", 0.75):
return ImportanceLevelEnum.DRINGEND
elif score >= thresholds.get("wichtig", 0.60):
return ImportanceLevelEnum.WICHTIG
elif score >= thresholds.get("pruefen", 0.40):
return ImportanceLevelEnum.PRUEFEN
else:
return ImportanceLevelEnum.INFO
def importance_to_label_de(importance: ImportanceLevelEnum) -> str:
"""Deutsches Label für Importance-Level."""
labels = {
ImportanceLevelEnum.KRITISCH: "Kritisch",
ImportanceLevelEnum.DRINGEND: "Dringend",
ImportanceLevelEnum.WICHTIG: "Wichtig",
ImportanceLevelEnum.PRUEFEN: "Zu prüfen",
ImportanceLevelEnum.INFO: "Info",
}
return labels.get(importance, "Info")
def importance_to_color(importance: ImportanceLevelEnum) -> str:
"""CSS-Farbe für Importance-Level (Tailwind-kompatibel)."""
colors = {
ImportanceLevelEnum.KRITISCH: "red",
ImportanceLevelEnum.DRINGEND: "orange",
ImportanceLevelEnum.WICHTIG: "amber",
ImportanceLevelEnum.PRUEFEN: "blue",
ImportanceLevelEnum.INFO: "slate",
}
return colors.get(importance, "slate")
def extract_deadline(text: str) -> Optional[datetime]:
"""
Extrahiere Deadline/Frist aus Text.
Sucht nach Mustern wie:
- "bis zum 15.03.2026"
- "Frist: 1. April"
- "Anmeldeschluss: 30.11."
"""
# Deutsche Datumsformate
patterns = [
r"bis\s+(?:zum\s+)?(\d{1,2})\.(\d{1,2})\.(\d{4})",
r"Frist[:\s]+(\d{1,2})\.(\d{1,2})\.(\d{4})",
r"(?:Anmelde|Bewerbungs)schluss[:\s]+(\d{1,2})\.(\d{1,2})\.?(?:(\d{4}))?",
r"endet\s+am\s+(\d{1,2})\.(\d{1,2})\.(\d{4})?",
]
for pattern in patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
day = int(match.group(1))
month = int(match.group(2))
year = int(match.group(3)) if match.group(3) else datetime.now().year
try:
return datetime(year, month, day)
except ValueError:
continue
return None
def generate_why_relevant(
alert: AlertItemDB,
profile_priorities: List[Dict[str, Any]] = None,
matched_keywords: List[str] = None
) -> str:
"""
Generiere "Warum relevant?"-Erklärung für einen Alert.
Args:
alert: Der Alert
profile_priorities: Prioritäten aus dem User-Profil
matched_keywords: Keywords, die gematcht haben
Returns:
Deutsche Erklärung (1-2 Bulletpoints)
"""
reasons = []
# Deadline-basierte Relevanz
deadline = extract_deadline(f"{alert.title} {alert.snippet}")
if deadline:
days_until = (deadline - datetime.now()).days
if days_until <= 0:
reasons.append("Frist abgelaufen oder heute!")
elif days_until <= 7:
reasons.append(f"Frist endet in {days_until} Tagen")
elif days_until <= 30:
reasons.append(f"Frist in ca. {days_until} Tagen")
# Keyword-basierte Relevanz
if matched_keywords and len(matched_keywords) > 0:
keywords_str = ", ".join(matched_keywords[:3])
reasons.append(f"Enthält relevante Begriffe: {keywords_str}")
# Prioritäten-basierte Relevanz
if profile_priorities:
for priority in profile_priorities[:2]:
label = priority.get("label", "")
keywords = priority.get("keywords", [])
text_lower = f"{alert.title} {alert.snippet}".lower()
for kw in keywords:
if kw.lower() in text_lower:
reasons.append(f"Passt zu Ihrem Interesse: {label}")
break
# Score-basierte Relevanz
if alert.relevance_score and alert.relevance_score >= 0.8:
reasons.append("Hohe Übereinstimmung mit Ihrem Profil")
# Fallback
if not reasons:
reasons.append("Passt zu Ihren ausgewählten Themen")
# Formatiere als Bulletpoints
return "".join(reasons[:2])
def generate_next_steps(
alert: AlertItemDB,
template_slug: str = None
) -> List[str]:
"""
Generiere empfohlene nächste Schritte.
Basiert auf Template-Typ und Alert-Inhalt.
"""
steps = []
text = f"{alert.title} {alert.snippet}".lower()
# Template-spezifische Schritte
if template_slug == "foerderprogramme":
if "antrag" in text or "förder" in text:
steps.append("Schulträger über Fördermöglichkeit informieren")
steps.append("Antragsunterlagen prüfen")
if "frist" in text or "deadline" in text:
steps.append("Termin in Kalender eintragen")
elif template_slug == "datenschutz-recht":
if "dsgvo" in text or "datenschutz" in text:
steps.append("Datenschutzbeauftragten informieren")
steps.append("Prüfen, ob Handlungsbedarf besteht")
if "urteil" in text or "gericht" in text:
steps.append("Rechtsfolgen für die Schule prüfen")
elif template_slug == "it-security":
if "cve" in text or "sicherheitslücke" in text:
steps.append("Betroffene Systeme prüfen")
steps.append("Update/Patch einspielen")
if "phishing" in text:
steps.append("Kollegium warnen")
steps.append("Erkennungsmerkmale kommunizieren")
elif template_slug == "abitur-updates":
if "abitur" in text or "prüfung" in text:
steps.append("Fachschaften informieren")
steps.append("Anpassung der Kursplanung prüfen")
elif template_slug == "fortbildungen":
steps.append("Termin und Ort prüfen")
steps.append("Bei Interesse: Anmeldung vornehmen")
elif template_slug == "wettbewerbe-projekte":
steps.append("Passende Schülergruppe identifizieren")
steps.append("Anmeldefrist beachten")
# Allgemeine Schritte als Fallback
if not steps:
steps.append("Quelle öffnen und Details lesen")
if "frist" in text or "bis" in text:
steps.append("Termin notieren")
return steps[:3] # Maximal 3 Schritte
def enrich_alert_for_guided_mode(
alert: AlertItemDB,
profile_priorities: List[Dict[str, Any]] = None,
template_slug: str = None,
importance_thresholds: Dict[str, float] = None
) -> AlertItemDB:
"""
Reichere Alert mit Guided-Mode-spezifischen Feldern an.
Setzt:
- importance_level
- why_relevant
- next_steps
- action_deadline
Args:
alert: Der Alert
profile_priorities: Prioritäten aus dem User-Profil
template_slug: Slug des aktiven Templates
importance_thresholds: Optionale Schwellenwerte
Returns:
Der angereicherte Alert
"""
# Importance Level
alert.importance_level = score_to_importance(
alert.relevance_score,
importance_thresholds
)
# Why Relevant
alert.why_relevant = generate_why_relevant(alert, profile_priorities)
# Next Steps
alert.next_steps = generate_next_steps(alert, template_slug)
# Action Deadline
deadline = extract_deadline(f"{alert.title} {alert.snippet}")
if deadline:
alert.action_deadline = deadline
return alert
def batch_enrich_alerts(
alerts: List[AlertItemDB],
profile_priorities: List[Dict[str, Any]] = None,
template_slug: str = None,
importance_thresholds: Dict[str, float] = None
) -> List[AlertItemDB]:
"""
Reichere mehrere Alerts für Guided Mode an.
"""
return [
enrich_alert_for_guided_mode(
alert,
profile_priorities,
template_slug,
importance_thresholds
)
for alert in alerts
]
def filter_by_importance(
alerts: List[AlertItemDB],
min_level: ImportanceLevelEnum = ImportanceLevelEnum.INFO,
max_count: int = 10
) -> List[AlertItemDB]:
"""
Filtere Alerts nach Mindest-Importance und limitiere Anzahl.
Sortiert nach Importance (höchste zuerst).
"""
# Importance-Ranking (höher = wichtiger)
importance_rank = {
ImportanceLevelEnum.KRITISCH: 5,
ImportanceLevelEnum.DRINGEND: 4,
ImportanceLevelEnum.WICHTIG: 3,
ImportanceLevelEnum.PRUEFEN: 2,
ImportanceLevelEnum.INFO: 1,
}
min_rank = importance_rank.get(min_level, 1)
# Filter
filtered = [
a for a in alerts
if importance_rank.get(a.importance_level, 1) >= min_rank
]
# Sortiere nach Importance (absteigend)
filtered.sort(
key=lambda a: importance_rank.get(a.importance_level, 1),
reverse=True
)
return filtered[:max_count]

View File

@@ -0,0 +1,390 @@
"""
Relevance Scorer für Alerts.
Nutzt das LLM Gateway um Alerts auf Relevanz zu prüfen.
Berücksichtigt das Nutzerprofil für personalisierte Filterung.
"""
import json
import logging
import re
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Optional
import httpx
from ..models.alert_item import AlertItem, AlertStatus
from ..models.relevance_profile import RelevanceProfile
logger = logging.getLogger(__name__)
class RelevanceDecision(str, Enum):
"""Relevanz-Entscheidung für einen Alert."""
KEEP = "KEEP" # Relevant, in Inbox anzeigen
DROP = "DROP" # Irrelevant, automatisch archivieren
REVIEW = "REVIEW" # Unsicher, Nutzer soll entscheiden
@dataclass
class ScoringResult:
"""Ergebnis des Relevanz-Scorings."""
alert_id: str
score: float # 0.0 - 1.0
decision: RelevanceDecision
reason_codes: list = field(default_factory=list)
summary: Optional[str] = None
entities: dict = field(default_factory=dict) # Extrahierte Entitäten
model_version: str = ""
prompt_version: str = "1.0"
scored_at: datetime = field(default_factory=datetime.utcnow)
error: Optional[str] = None
def to_dict(self) -> dict:
return {
"alert_id": self.alert_id,
"score": self.score,
"decision": self.decision.value,
"reason_codes": self.reason_codes,
"summary": self.summary,
"entities": self.entities,
"model_version": self.model_version,
"prompt_version": self.prompt_version,
"scored_at": self.scored_at.isoformat(),
"error": self.error,
}
# System Prompt für Relevanz-Scoring
RELEVANCE_SYSTEM_PROMPT = """Du bist ein Relevanz-Filter für News-Alerts. Deine Aufgabe ist es, zu bewerten, ob ein Alert für den Nutzer relevant ist.
## Deine Aufgaben:
1. Analysiere den Alert-Titel und Snippet
2. Berücksichtige das Nutzerprofil (Prioritäten, Ausschlüsse, Beispiele)
3. Gib eine Relevanz-Bewertung ab
## Bewertungskriterien:
- **KEEP** (Score 0.7-1.0): Alert ist klar relevant für die Prioritäten des Nutzers
- **REVIEW** (Score 0.4-0.7): Möglicherweise relevant, Nutzer sollte entscheiden
- **DROP** (Score 0.0-0.4): Nicht relevant, kann ignoriert werden
## Ausschluss-Gründe (automatisch DROP):
- Stellenanzeigen, Werbung, Pressemitteilungen (außer hochrelevant)
- Duplicate/sehr ähnliche Meldung zu kürzlichem Alert
- Thema in Ausschlussliste des Nutzers
## Output-Format:
Du MUSST mit einem JSON-Objekt antworten (keine Markdown-Codeblöcke, nur das JSON):
{
"score": 0.85,
"decision": "KEEP",
"reason_codes": ["matches_priority_inklusion", "recent_news"],
"summary": "Kurze Zusammenfassung des Alerts (1-2 Sätze)",
"entities": {
"topics": ["Inklusion", "Bayern"],
"organizations": ["Kultusministerium"],
"date_context": "aktuell"
}
}
Wichtig:
- score ist eine Zahl zwischen 0.0 und 1.0
- decision ist entweder "KEEP", "DROP" oder "REVIEW"
- reason_codes sind kurze, maschinenlesbare Codes
- summary ist auf Deutsch
"""
class RelevanceScorer:
"""
Scorer für Alert-Relevanz.
Nutzt das LLM Gateway für die Bewertung.
"""
def __init__(
self,
gateway_url: str = "http://localhost:8000/llm",
api_key: str = "",
model: str = "breakpilot-teacher-8b",
timeout: int = 30,
):
"""
Initialisiere RelevanceScorer.
Args:
gateway_url: URL des LLM Gateway
api_key: API Key für Gateway
model: Modell für Scoring
timeout: HTTP Timeout
"""
self.gateway_url = gateway_url.rstrip("/")
self.api_key = api_key
self.model = model
self.timeout = timeout
self._client: Optional[httpx.AsyncClient] = None
# Schwellenwerte
self.keep_threshold = 0.7
self.drop_threshold = 0.4
async def _get_client(self) -> httpx.AsyncClient:
"""Hole oder erstelle HTTP Client."""
if self._client is None or self._client.is_closed:
self._client = httpx.AsyncClient(
timeout=self.timeout,
headers={
"X-API-Key": self.api_key,
"Content-Type": "application/json",
},
)
return self._client
async def close(self) -> None:
"""Schließe HTTP Client."""
if self._client:
await self._client.aclose()
self._client = None
def _build_user_prompt(self, alert: AlertItem) -> str:
"""Erstelle User-Prompt für einen Alert."""
parts = [
f"## Alert zu bewerten\n",
f"**Thema-Label:** {alert.topic_label}",
f"**Titel:** {alert.title}",
]
if alert.snippet:
# Snippet auf 500 Zeichen begrenzen
snippet = alert.snippet[:500]
if len(alert.snippet) > 500:
snippet += "..."
parts.append(f"**Snippet:** {snippet}")
if alert.url:
parts.append(f"**URL:** {alert.url}")
if alert.published_at:
parts.append(f"**Veröffentlicht:** {alert.published_at.strftime('%Y-%m-%d')}")
parts.append("\nBewerte diesen Alert und antworte NUR mit dem JSON-Objekt.")
return "\n".join(parts)
def _build_system_prompt(self, profile: Optional[RelevanceProfile] = None) -> str:
"""Erstelle System-Prompt mit optionalem Profil."""
system = RELEVANCE_SYSTEM_PROMPT
if profile:
system += "\n\n" + profile.get_prompt_context()
return system
def _parse_response(self, text: str, alert_id: str) -> ScoringResult:
"""Parse LLM Response in ScoringResult."""
try:
# Versuche JSON zu extrahieren
# Manchmal wrapped das LLM in Markdown Code-Blocks
json_match = re.search(r"\{[\s\S]*\}", text)
if not json_match:
raise ValueError("Kein JSON in Response gefunden")
data = json.loads(json_match.group())
score = float(data.get("score", 0.5))
score = max(0.0, min(1.0, score)) # Clamp to 0-1
decision_str = data.get("decision", "REVIEW").upper()
try:
decision = RelevanceDecision(decision_str)
except ValueError:
# Fallback basierend auf Score
if score >= self.keep_threshold:
decision = RelevanceDecision.KEEP
elif score <= self.drop_threshold:
decision = RelevanceDecision.DROP
else:
decision = RelevanceDecision.REVIEW
return ScoringResult(
alert_id=alert_id,
score=score,
decision=decision,
reason_codes=data.get("reason_codes", []),
summary=data.get("summary"),
entities=data.get("entities", {}),
model_version=self.model,
)
except json.JSONDecodeError as e:
logger.warning(f"JSON Parse Error für Alert {alert_id}: {e}")
return ScoringResult(
alert_id=alert_id,
score=0.5,
decision=RelevanceDecision.REVIEW,
reason_codes=["parse_error"],
error=f"JSON parse error: {str(e)}",
model_version=self.model,
)
except Exception as e:
logger.error(f"Unexpected error parsing response: {e}")
return ScoringResult(
alert_id=alert_id,
score=0.5,
decision=RelevanceDecision.REVIEW,
reason_codes=["error"],
error=str(e),
model_version=self.model,
)
async def score_alert(
self,
alert: AlertItem,
profile: Optional[RelevanceProfile] = None,
) -> ScoringResult:
"""
Bewerte einen einzelnen Alert.
Args:
alert: Der zu bewertende Alert
profile: Optional Nutzerprofil für personalisierte Bewertung
Returns:
ScoringResult mit Bewertung
"""
try:
client = await self._get_client()
# Request Body
payload = {
"model": self.model,
"messages": [
{"role": "system", "content": self._build_system_prompt(profile)},
{"role": "user", "content": self._build_user_prompt(alert)},
],
"temperature": 0.3, # Niedrig für konsistentere Ergebnisse
"max_tokens": 500,
}
response = await client.post(
f"{self.gateway_url}/v1/chat/completions",
json=payload,
)
response.raise_for_status()
data = response.json()
content = data["choices"][0]["message"]["content"]
result = self._parse_response(content, alert.id)
# Update Alert
alert.relevance_score = result.score
alert.relevance_decision = result.decision.value
alert.relevance_reasons = result.reason_codes
alert.relevance_summary = result.summary
alert.status = AlertStatus.SCORED
return result
except httpx.HTTPStatusError as e:
error_msg = f"HTTP {e.response.status_code}: {e.response.text[:200]}"
logger.error(f"Gateway Error für Alert {alert.id}: {error_msg}")
return ScoringResult(
alert_id=alert.id,
score=0.5,
decision=RelevanceDecision.REVIEW,
reason_codes=["gateway_error"],
error=error_msg,
model_version=self.model,
)
except Exception as e:
logger.exception(f"Scoring Error für Alert {alert.id}")
return ScoringResult(
alert_id=alert.id,
score=0.5,
decision=RelevanceDecision.REVIEW,
reason_codes=["error"],
error=str(e),
model_version=self.model,
)
async def score_batch(
self,
alerts: list[AlertItem],
profile: Optional[RelevanceProfile] = None,
skip_scored: bool = True,
) -> list[ScoringResult]:
"""
Bewerte mehrere Alerts.
Args:
alerts: Liste von Alerts
profile: Nutzerprofil
skip_scored: Bereits bewertete überspringen
Returns:
Liste von ScoringResults
"""
results = []
for alert in alerts:
if skip_scored and alert.status == AlertStatus.SCORED:
logger.debug(f"Alert {alert.id} bereits bewertet, überspringe")
continue
result = await self.score_alert(alert, profile)
results.append(result)
# Kurze Pause zwischen Requests um Rate Limits zu vermeiden
# await asyncio.sleep(0.1)
return results
def get_stats(self, results: list[ScoringResult]) -> dict:
"""Generiere Statistiken über Scoring-Ergebnisse."""
total = len(results)
if total == 0:
return {"total": 0}
keep = sum(1 for r in results if r.decision == RelevanceDecision.KEEP)
drop = sum(1 for r in results if r.decision == RelevanceDecision.DROP)
review = sum(1 for r in results if r.decision == RelevanceDecision.REVIEW)
errors = sum(1 for r in results if r.error)
avg_score = sum(r.score for r in results) / total
return {
"total": total,
"keep": keep,
"drop": drop,
"review": review,
"errors": errors,
"keep_rate": keep / total,
"drop_rate": drop / total,
"avg_score": avg_score,
}
# Singleton Instance
_scorer_instance: Optional[RelevanceScorer] = None
def get_relevance_scorer(
gateway_url: str = "http://localhost:8000/llm",
api_key: str = "",
model: str = "breakpilot-teacher-8b",
) -> RelevanceScorer:
"""Hole Singleton RelevanceScorer Instanz."""
global _scorer_instance
if _scorer_instance is None:
_scorer_instance = RelevanceScorer(
gateway_url=gateway_url,
api_key=api_key,
model=model,
)
return _scorer_instance

View File

@@ -0,0 +1,512 @@
"""
Rule Engine für Alerts Agent.
Evaluiert Regeln gegen Alert-Items und führt Aktionen aus.
Regel-Struktur:
- Bedingungen: [{field, operator, value}, ...] (AND-verknüpft)
- Aktion: keep, drop, tag, email, webhook, slack
- Priorität: Höhere Priorität wird zuerst evaluiert
"""
import re
import logging
from dataclasses import dataclass
from typing import List, Dict, Any, Optional, Callable
from enum import Enum
from alerts_agent.db.models import AlertItemDB, AlertRuleDB, RuleActionEnum
logger = logging.getLogger(__name__)
class ConditionOperator(str, Enum):
"""Operatoren für Regel-Bedingungen."""
CONTAINS = "contains"
NOT_CONTAINS = "not_contains"
EQUALS = "equals"
NOT_EQUALS = "not_equals"
STARTS_WITH = "starts_with"
ENDS_WITH = "ends_with"
REGEX = "regex"
GREATER_THAN = "gt"
LESS_THAN = "lt"
GREATER_EQUAL = "gte"
LESS_EQUAL = "lte"
IN_LIST = "in"
NOT_IN_LIST = "not_in"
@dataclass
class RuleCondition:
"""Eine einzelne Regel-Bedingung."""
field: str # "title", "snippet", "url", "source", "relevance_score"
operator: ConditionOperator
value: Any # str, float, list
@classmethod
def from_dict(cls, data: Dict) -> "RuleCondition":
"""Erstellt eine Bedingung aus einem Dict."""
return cls(
field=data.get("field", ""),
operator=ConditionOperator(data.get("operator", data.get("op", "contains"))),
value=data.get("value", ""),
)
@dataclass
class RuleMatch:
"""Ergebnis einer Regel-Evaluierung."""
rule_id: str
rule_name: str
matched: bool
action: RuleActionEnum
action_config: Dict[str, Any]
conditions_met: List[str] # Welche Bedingungen haben gematched
def get_field_value(alert: AlertItemDB, field: str) -> Any:
"""
Extrahiert einen Feldwert aus einem Alert.
Args:
alert: Alert-Item
field: Feldname
Returns:
Feldwert oder None
"""
field_map = {
"title": alert.title,
"snippet": alert.snippet,
"url": alert.url,
"source": alert.source.value if alert.source else "",
"status": alert.status.value if alert.status else "",
"relevance_score": alert.relevance_score,
"relevance_decision": alert.relevance_decision.value if alert.relevance_decision else "",
"lang": alert.lang,
"topic_id": alert.topic_id,
}
return field_map.get(field)
def evaluate_condition(
alert: AlertItemDB,
condition: RuleCondition,
) -> bool:
"""
Evaluiert eine einzelne Bedingung gegen einen Alert.
Args:
alert: Alert-Item
condition: Zu evaluierende Bedingung
Returns:
True wenn Bedingung erfüllt
"""
field_value = get_field_value(alert, condition.field)
if field_value is None:
return False
op = condition.operator
target = condition.value
try:
# String-Operationen (case-insensitive)
if isinstance(field_value, str):
field_lower = field_value.lower()
target_lower = str(target).lower() if isinstance(target, str) else target
if op == ConditionOperator.CONTAINS:
return target_lower in field_lower
elif op == ConditionOperator.NOT_CONTAINS:
return target_lower not in field_lower
elif op == ConditionOperator.EQUALS:
return field_lower == target_lower
elif op == ConditionOperator.NOT_EQUALS:
return field_lower != target_lower
elif op == ConditionOperator.STARTS_WITH:
return field_lower.startswith(target_lower)
elif op == ConditionOperator.ENDS_WITH:
return field_lower.endswith(target_lower)
elif op == ConditionOperator.REGEX:
try:
return bool(re.search(str(target), field_value, re.IGNORECASE))
except re.error:
logger.warning(f"Invalid regex pattern: {target}")
return False
elif op == ConditionOperator.IN_LIST:
if isinstance(target, list):
return any(t.lower() in field_lower for t in target if isinstance(t, str))
return False
elif op == ConditionOperator.NOT_IN_LIST:
if isinstance(target, list):
return not any(t.lower() in field_lower for t in target if isinstance(t, str))
return True
# Numerische Operationen
elif isinstance(field_value, (int, float)):
target_num = float(target) if target else 0
if op == ConditionOperator.EQUALS:
return field_value == target_num
elif op == ConditionOperator.NOT_EQUALS:
return field_value != target_num
elif op == ConditionOperator.GREATER_THAN:
return field_value > target_num
elif op == ConditionOperator.LESS_THAN:
return field_value < target_num
elif op == ConditionOperator.GREATER_EQUAL:
return field_value >= target_num
elif op == ConditionOperator.LESS_EQUAL:
return field_value <= target_num
except Exception as e:
logger.error(f"Error evaluating condition: {e}")
return False
return False
def evaluate_rule(
alert: AlertItemDB,
rule: AlertRuleDB,
) -> RuleMatch:
"""
Evaluiert eine Regel gegen einen Alert.
Alle Bedingungen müssen erfüllt sein (AND-Verknüpfung).
Args:
alert: Alert-Item
rule: Zu evaluierende Regel
Returns:
RuleMatch-Ergebnis
"""
conditions = rule.conditions or []
conditions_met = []
all_matched = True
for cond_dict in conditions:
condition = RuleCondition.from_dict(cond_dict)
if evaluate_condition(alert, condition):
conditions_met.append(f"{condition.field} {condition.operator.value} {condition.value}")
else:
all_matched = False
# Wenn keine Bedingungen definiert sind, matcht die Regel immer
if not conditions:
all_matched = True
return RuleMatch(
rule_id=rule.id,
rule_name=rule.name,
matched=all_matched,
action=rule.action_type,
action_config=rule.action_config or {},
conditions_met=conditions_met,
)
def evaluate_rules_for_alert(
alert: AlertItemDB,
rules: List[AlertRuleDB],
) -> Optional[RuleMatch]:
"""
Evaluiert alle Regeln gegen einen Alert und gibt den ersten Match zurück.
Regeln werden nach Priorität (absteigend) evaluiert.
Args:
alert: Alert-Item
rules: Liste von Regeln (sollte bereits nach Priorität sortiert sein)
Returns:
Erster RuleMatch oder None
"""
for rule in rules:
if not rule.is_active:
continue
# Topic-Filter: Regel gilt nur für bestimmtes Topic
if rule.topic_id and rule.topic_id != alert.topic_id:
continue
match = evaluate_rule(alert, rule)
if match.matched:
logger.debug(
f"Rule '{rule.name}' matched alert '{alert.id[:8]}': "
f"{match.conditions_met}"
)
return match
return None
class RuleEngine:
"""
Rule Engine für Batch-Verarbeitung von Alerts.
Verwendet für das Scoring von mehreren Alerts gleichzeitig.
"""
def __init__(self, db_session):
"""
Initialisiert die Rule Engine.
Args:
db_session: SQLAlchemy Session
"""
self.db = db_session
self._rules_cache: Optional[List[AlertRuleDB]] = None
def _get_active_rules(self) -> List[AlertRuleDB]:
"""Lädt aktive Regeln aus der Datenbank (cached)."""
if self._rules_cache is None:
from alerts_agent.db.repository import RuleRepository
repo = RuleRepository(self.db)
self._rules_cache = repo.get_active()
return self._rules_cache
def clear_cache(self) -> None:
"""Leert den Regel-Cache."""
self._rules_cache = None
def process_alert(
self,
alert: AlertItemDB,
) -> Optional[RuleMatch]:
"""
Verarbeitet einen Alert mit allen aktiven Regeln.
Args:
alert: Alert-Item
Returns:
RuleMatch wenn eine Regel matcht, sonst None
"""
rules = self._get_active_rules()
return evaluate_rules_for_alert(alert, rules)
def process_alerts(
self,
alerts: List[AlertItemDB],
) -> Dict[str, RuleMatch]:
"""
Verarbeitet mehrere Alerts mit allen aktiven Regeln.
Args:
alerts: Liste von Alert-Items
Returns:
Dict von alert_id -> RuleMatch (nur für gematschte Alerts)
"""
rules = self._get_active_rules()
results = {}
for alert in alerts:
match = evaluate_rules_for_alert(alert, rules)
if match:
results[alert.id] = match
return results
def apply_rule_actions(
self,
alert: AlertItemDB,
match: RuleMatch,
) -> Dict[str, Any]:
"""
Wendet die Regel-Aktion auf einen Alert an.
Args:
alert: Alert-Item
match: RuleMatch mit Aktionsinformationen
Returns:
Dict mit Ergebnis der Aktion
"""
from alerts_agent.db.repository import AlertItemRepository, RuleRepository
alert_repo = AlertItemRepository(self.db)
rule_repo = RuleRepository(self.db)
action = match.action
config = match.action_config
result = {"action": action.value, "success": False}
try:
if action == RuleActionEnum.KEEP:
# Alert als KEEP markieren
alert_repo.update_scoring(
alert_id=alert.id,
score=1.0,
decision="KEEP",
reasons=["rule_match"],
summary=f"Matched rule: {match.rule_name}",
model="rule_engine",
)
result["success"] = True
elif action == RuleActionEnum.DROP:
# Alert als DROP markieren
alert_repo.update_scoring(
alert_id=alert.id,
score=0.0,
decision="DROP",
reasons=["rule_match"],
summary=f"Dropped by rule: {match.rule_name}",
model="rule_engine",
)
result["success"] = True
elif action == RuleActionEnum.TAG:
# Tags hinzufügen
tags = config.get("tags", [])
if tags:
existing_tags = alert.user_tags or []
new_tags = list(set(existing_tags + tags))
alert_repo.update(alert.id, user_tags=new_tags)
result["tags_added"] = tags
result["success"] = True
elif action == RuleActionEnum.EMAIL:
# E-Mail-Benachrichtigung senden
# Wird von Actions-Modul behandelt
result["email_config"] = config
result["success"] = True
result["deferred"] = True # Wird später gesendet
elif action == RuleActionEnum.WEBHOOK:
# Webhook aufrufen
# Wird von Actions-Modul behandelt
result["webhook_config"] = config
result["success"] = True
result["deferred"] = True
elif action == RuleActionEnum.SLACK:
# Slack-Nachricht senden
# Wird von Actions-Modul behandelt
result["slack_config"] = config
result["success"] = True
result["deferred"] = True
# Match-Count erhöhen
rule_repo.increment_match_count(match.rule_id)
except Exception as e:
logger.error(f"Error applying rule action: {e}")
result["error"] = str(e)
return result
# Convenience-Funktionen für einfache Nutzung
def create_keyword_rule(
name: str,
keywords: List[str],
action: str = "keep",
field: str = "title",
) -> Dict:
"""
Erstellt eine Keyword-basierte Regel.
Args:
name: Regelname
keywords: Liste von Keywords (OR-verknüpft über IN_LIST)
action: Aktion (keep, drop, tag)
field: Feld zum Prüfen (title, snippet, url)
Returns:
Regel-Definition als Dict
"""
return {
"name": name,
"conditions": [
{
"field": field,
"operator": "in",
"value": keywords,
}
],
"action_type": action,
"action_config": {},
}
def create_exclusion_rule(
name: str,
excluded_terms: List[str],
field: str = "title",
) -> Dict:
"""
Erstellt eine Ausschluss-Regel.
Args:
name: Regelname
excluded_terms: Liste von auszuschließenden Begriffen
field: Feld zum Prüfen
Returns:
Regel-Definition als Dict
"""
return {
"name": name,
"conditions": [
{
"field": field,
"operator": "in",
"value": excluded_terms,
}
],
"action_type": "drop",
"action_config": {},
}
def create_score_threshold_rule(
name: str,
min_score: float,
action: str = "keep",
) -> Dict:
"""
Erstellt eine Score-basierte Regel.
Args:
name: Regelname
min_score: Mindest-Score
action: Aktion bei Erreichen des Scores
Returns:
Regel-Definition als Dict
"""
return {
"name": name,
"conditions": [
{
"field": "relevance_score",
"operator": "gte",
"value": min_score,
}
],
"action_type": action,
"action_config": {},
}