fix: Restore all files lost during destructive rebase
A previous `git pull --rebase origin main` dropped 177 local commits,
losing 3400+ files across admin-v2, backend, studio-v2, website,
klausur-service, and many other services. The partial restore attempt
(660295e2) only recovered some files.
This commit restores all missing files from pre-rebase ref 98933f5e
while preserving post-rebase additions (night-scheduler, night-mode UI,
NightModeWidget dashboard integration).
Restored features include:
- AI Module Sidebar (FAB), OCR Labeling, OCR Compare
- GPU Dashboard, RAG Pipeline, Magic Help
- Klausur-Korrektur (8 files), Abitur-Archiv (5+ files)
- Companion, Zeugnisse-Crawler, Screen Flow
- Full backend, studio-v2, website, klausur-service
- All compliance SDKs, agent-core, voice-service
- CI/CD configs, documentation, scripts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
12
backend/alerts_agent/processing/__init__.py
Normal file
12
backend/alerts_agent/processing/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
"""Alert Processing Modules."""
|
||||
|
||||
from .dedup import compute_simhash, hamming_distance, find_duplicates
|
||||
from .relevance_scorer import RelevanceScorer, ScoringResult
|
||||
|
||||
__all__ = [
|
||||
"compute_simhash",
|
||||
"hamming_distance",
|
||||
"find_duplicates",
|
||||
"RelevanceScorer",
|
||||
"ScoringResult",
|
||||
]
|
||||
239
backend/alerts_agent/processing/dedup.py
Normal file
239
backend/alerts_agent/processing/dedup.py
Normal file
@@ -0,0 +1,239 @@
|
||||
"""
|
||||
Deduplizierung für Alerts.
|
||||
|
||||
Nutzt SimHash für Fuzzy-Matching von ähnlichen Texten.
|
||||
SimHash ist ein Locality-Sensitive Hash, bei dem ähnliche Texte
|
||||
ähnliche Hashes produzieren.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import re
|
||||
from typing import Optional
|
||||
from collections import Counter
|
||||
|
||||
|
||||
# SimHash Parameter
|
||||
SIMHASH_BITS = 64
|
||||
SHINGLE_SIZE = 3 # Anzahl aufeinanderfolgender Wörter
|
||||
|
||||
|
||||
def _tokenize(text: str) -> list:
|
||||
"""
|
||||
Tokenisiere Text in normalisierte Wörter.
|
||||
|
||||
- Lowercase
|
||||
- Nur alphanumerische Zeichen
|
||||
- Stoppwörter entfernen (deutsche)
|
||||
"""
|
||||
# Deutsche Stoppwörter (häufige Wörter ohne semantischen Wert)
|
||||
STOPWORDS = {
|
||||
"der", "die", "das", "den", "dem", "des", "ein", "eine", "einer", "eines",
|
||||
"und", "oder", "aber", "doch", "wenn", "weil", "dass", "als", "auch",
|
||||
"ist", "sind", "war", "waren", "wird", "werden", "wurde", "wurden",
|
||||
"hat", "haben", "hatte", "hatten", "kann", "können", "konnte", "konnten",
|
||||
"für", "von", "mit", "bei", "nach", "aus", "über", "unter", "vor", "hinter",
|
||||
"auf", "an", "in", "im", "am", "um", "bis", "durch", "ohne", "gegen",
|
||||
"nicht", "noch", "nur", "schon", "sehr", "mehr", "sich", "es", "sie", "er",
|
||||
"wir", "ihr", "ich", "du", "man", "so", "wie", "was", "wer", "wo", "wann",
|
||||
}
|
||||
|
||||
# Normalisiere
|
||||
text = text.lower()
|
||||
# Nur Buchstaben, Zahlen und Umlaute
|
||||
text = re.sub(r"[^a-zäöüß0-9\s]", " ", text)
|
||||
# Tokenisiere
|
||||
words = text.split()
|
||||
# Filtere Stoppwörter und kurze Wörter
|
||||
words = [w for w in words if w not in STOPWORDS and len(w) > 2]
|
||||
|
||||
return words
|
||||
|
||||
|
||||
def _create_shingles(words: list, size: int = SHINGLE_SIZE) -> list:
|
||||
"""
|
||||
Erstelle Shingles (n-Gramme) aus Wortliste.
|
||||
|
||||
Shingles sind überlappende Sequenzen von Wörtern.
|
||||
z.B. ["a", "b", "c", "d"] mit size=2 -> ["a b", "b c", "c d"]
|
||||
"""
|
||||
if len(words) < size:
|
||||
return [" ".join(words)] if words else []
|
||||
|
||||
return [" ".join(words[i:i+size]) for i in range(len(words) - size + 1)]
|
||||
|
||||
|
||||
def _hash_shingle(shingle: str) -> int:
|
||||
"""Hash ein Shingle zu einer 64-bit Zahl."""
|
||||
# Nutze MD5 und nimm erste 8 Bytes (64 bit)
|
||||
h = hashlib.md5(shingle.encode()).digest()[:8]
|
||||
return int.from_bytes(h, byteorder="big")
|
||||
|
||||
|
||||
def compute_simhash(text: str) -> str:
|
||||
"""
|
||||
Berechne SimHash eines Texts.
|
||||
|
||||
SimHash funktioniert wie folgt:
|
||||
1. Text in Shingles (Wort-n-Gramme) aufteilen
|
||||
2. Jeden Shingle hashen
|
||||
3. Für jeden Hash: Wenn Bit=1 -> +1, sonst -1
|
||||
4. Summieren über alle Hashes
|
||||
5. Wenn Summe > 0 -> Bit=1, sonst 0
|
||||
|
||||
Returns:
|
||||
16-Zeichen Hex-String (64 bit)
|
||||
"""
|
||||
if not text:
|
||||
return "0" * 16
|
||||
|
||||
words = _tokenize(text)
|
||||
if not words:
|
||||
return "0" * 16
|
||||
|
||||
shingles = _create_shingles(words)
|
||||
if not shingles:
|
||||
return "0" * 16
|
||||
|
||||
# Bit-Vektoren initialisieren
|
||||
v = [0] * SIMHASH_BITS
|
||||
|
||||
for shingle in shingles:
|
||||
h = _hash_shingle(shingle)
|
||||
|
||||
for i in range(SIMHASH_BITS):
|
||||
bit = (h >> i) & 1
|
||||
if bit:
|
||||
v[i] += 1
|
||||
else:
|
||||
v[i] -= 1
|
||||
|
||||
# Finalen Hash berechnen
|
||||
simhash = 0
|
||||
for i in range(SIMHASH_BITS):
|
||||
if v[i] > 0:
|
||||
simhash |= (1 << i)
|
||||
|
||||
return format(simhash, "016x")
|
||||
|
||||
|
||||
def hamming_distance(hash1: str, hash2: str) -> int:
|
||||
"""
|
||||
Berechne Hamming-Distanz zwischen zwei SimHashes.
|
||||
|
||||
Die Hamming-Distanz ist die Anzahl der unterschiedlichen Bits.
|
||||
Je kleiner, desto ähnlicher sind die Texte.
|
||||
|
||||
Typische Schwellenwerte:
|
||||
- 0-3: Sehr ähnlich (wahrscheinlich Duplikat)
|
||||
- 4-7: Ähnlich (gleiches Thema)
|
||||
- 8+: Unterschiedlich
|
||||
|
||||
Returns:
|
||||
Anzahl unterschiedlicher Bits (0-64)
|
||||
"""
|
||||
if not hash1 or not hash2:
|
||||
return SIMHASH_BITS
|
||||
|
||||
try:
|
||||
h1 = int(hash1, 16)
|
||||
h2 = int(hash2, 16)
|
||||
except ValueError:
|
||||
return SIMHASH_BITS
|
||||
|
||||
xor = h1 ^ h2
|
||||
return bin(xor).count("1")
|
||||
|
||||
|
||||
def are_similar(hash1: str, hash2: str, threshold: int = 5) -> bool:
|
||||
"""
|
||||
Prüfe ob zwei Hashes auf ähnliche Texte hindeuten.
|
||||
|
||||
Args:
|
||||
hash1: Erster SimHash
|
||||
hash2: Zweiter SimHash
|
||||
threshold: Maximale Hamming-Distanz für Ähnlichkeit
|
||||
|
||||
Returns:
|
||||
True wenn Texte wahrscheinlich ähnlich sind
|
||||
"""
|
||||
return hamming_distance(hash1, hash2) <= threshold
|
||||
|
||||
|
||||
def find_duplicates(items: list, hash_field: str = "content_hash",
|
||||
threshold: int = 3) -> dict:
|
||||
"""
|
||||
Finde Duplikate/Cluster in einer Liste von Items.
|
||||
|
||||
Args:
|
||||
items: Liste von Objekten mit hash_field Attribut
|
||||
hash_field: Name des Attributs das den SimHash enthält
|
||||
threshold: Max Hamming-Distanz für Duplikat-Erkennung
|
||||
|
||||
Returns:
|
||||
Dict mit {item_id: cluster_id} für Duplikate
|
||||
"""
|
||||
clusters = {} # cluster_id -> list of items
|
||||
item_to_cluster = {} # item_id -> cluster_id
|
||||
|
||||
cluster_counter = 0
|
||||
|
||||
for item in items:
|
||||
item_id = getattr(item, "id", str(id(item)))
|
||||
item_hash = getattr(item, hash_field, None)
|
||||
|
||||
if not item_hash:
|
||||
continue
|
||||
|
||||
# Suche nach existierendem Cluster
|
||||
found_cluster = None
|
||||
for cluster_id, cluster_items in clusters.items():
|
||||
for existing_item in cluster_items:
|
||||
existing_hash = getattr(existing_item, hash_field, None)
|
||||
if existing_hash and hamming_distance(item_hash, existing_hash) <= threshold:
|
||||
found_cluster = cluster_id
|
||||
break
|
||||
if found_cluster:
|
||||
break
|
||||
|
||||
if found_cluster:
|
||||
clusters[found_cluster].append(item)
|
||||
item_to_cluster[item_id] = found_cluster
|
||||
else:
|
||||
# Neuen Cluster starten
|
||||
cluster_id = f"cluster_{cluster_counter}"
|
||||
cluster_counter += 1
|
||||
clusters[cluster_id] = [item]
|
||||
item_to_cluster[item_id] = cluster_id
|
||||
|
||||
# Filtere Single-Item Cluster (keine echten Duplikate)
|
||||
duplicates = {}
|
||||
for item_id, cluster_id in item_to_cluster.items():
|
||||
if len(clusters[cluster_id]) > 1:
|
||||
duplicates[item_id] = cluster_id
|
||||
|
||||
return duplicates
|
||||
|
||||
|
||||
def exact_url_duplicates(items: list, url_field: str = "canonical_url") -> set:
|
||||
"""
|
||||
Finde exakte URL-Duplikate.
|
||||
|
||||
Returns:
|
||||
Set von Item-IDs die Duplikate sind (nicht das Original)
|
||||
"""
|
||||
seen_urls = {} # url -> first item id
|
||||
duplicates = set()
|
||||
|
||||
for item in items:
|
||||
item_id = getattr(item, "id", str(id(item)))
|
||||
url = getattr(item, url_field, None)
|
||||
|
||||
if not url:
|
||||
continue
|
||||
|
||||
if url in seen_urls:
|
||||
duplicates.add(item_id)
|
||||
else:
|
||||
seen_urls[url] = item_id
|
||||
|
||||
return duplicates
|
||||
458
backend/alerts_agent/processing/digest_generator.py
Normal file
458
backend/alerts_agent/processing/digest_generator.py
Normal file
@@ -0,0 +1,458 @@
|
||||
"""
|
||||
Digest Generator fuer Wochenzusammenfassungen.
|
||||
|
||||
Generiert LLM-basierte Zusammenfassungen der wichtigsten Alerts:
|
||||
- Gruppierung nach Wichtigkeit (Kritisch, Dringend, Wichtig, etc.)
|
||||
- Kurze Zusammenfassung pro Kategorie
|
||||
- HTML-Ausgabe fuer E-Mail und UI
|
||||
- PDF-Export
|
||||
|
||||
Verwendung:
|
||||
generator = DigestGenerator(db_session, llm_client)
|
||||
digest = await generator.generate_weekly_digest(user_id)
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
from dataclasses import dataclass
|
||||
import json
|
||||
import os
|
||||
|
||||
from ..db.models import (
|
||||
AlertItemDB, AlertDigestDB, UserAlertSubscriptionDB,
|
||||
ImportanceLevelEnum, DigestStatusEnum
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DigestSection:
|
||||
"""Eine Sektion im Digest (z.B. Kritisch, Dringend)."""
|
||||
importance_level: ImportanceLevelEnum
|
||||
label_de: str
|
||||
color: str
|
||||
items: List[AlertItemDB]
|
||||
summary: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class DigestContent:
|
||||
"""Vollstaendiger Digest-Inhalt."""
|
||||
user_id: str
|
||||
period_start: datetime
|
||||
period_end: datetime
|
||||
sections: List[DigestSection]
|
||||
total_alerts: int
|
||||
critical_count: int
|
||||
urgent_count: int
|
||||
introduction: str = ""
|
||||
html: str = ""
|
||||
|
||||
|
||||
class DigestGenerator:
|
||||
"""
|
||||
Generiert Wochenzusammenfassungen fuer Alerts.
|
||||
|
||||
Unterstuetzt:
|
||||
- Lokale Ollama-Modelle
|
||||
- OpenAI API
|
||||
- Anthropic API
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
db_session,
|
||||
llm_provider: str = "ollama",
|
||||
llm_model: str = "llama3.2:3b"
|
||||
):
|
||||
"""
|
||||
Initialisiere den Digest Generator.
|
||||
|
||||
Args:
|
||||
db_session: SQLAlchemy Session
|
||||
llm_provider: "ollama", "openai", oder "anthropic"
|
||||
llm_model: Modellname
|
||||
"""
|
||||
self.db = db_session
|
||||
self.llm_provider = llm_provider
|
||||
self.llm_model = llm_model
|
||||
|
||||
async def generate_weekly_digest(
|
||||
self,
|
||||
user_id: str,
|
||||
weeks_back: int = 1
|
||||
) -> Optional[AlertDigestDB]:
|
||||
"""
|
||||
Generiere einen Wochendigest fuer einen User.
|
||||
|
||||
Args:
|
||||
user_id: User-ID
|
||||
weeks_back: Wie viele Wochen zurueck (default: letzte Woche)
|
||||
|
||||
Returns:
|
||||
AlertDigestDB oder None bei Fehler
|
||||
"""
|
||||
# Zeitraum berechnen
|
||||
now = datetime.utcnow()
|
||||
period_end = now - timedelta(days=now.weekday()) # Montag dieser Woche
|
||||
period_start = period_end - timedelta(weeks=weeks_back)
|
||||
|
||||
# Alerts laden
|
||||
alerts = self._load_alerts_for_period(user_id, period_start, period_end)
|
||||
|
||||
if not alerts:
|
||||
return None
|
||||
|
||||
# Nach Wichtigkeit gruppieren
|
||||
sections = self._group_by_importance(alerts)
|
||||
|
||||
# Digest-Content erstellen
|
||||
content = DigestContent(
|
||||
user_id=user_id,
|
||||
period_start=period_start,
|
||||
period_end=period_end,
|
||||
sections=sections,
|
||||
total_alerts=len(alerts),
|
||||
critical_count=len([a for a in alerts if a.importance_level == ImportanceLevelEnum.KRITISCH]),
|
||||
urgent_count=len([a for a in alerts if a.importance_level == ImportanceLevelEnum.DRINGEND])
|
||||
)
|
||||
|
||||
# LLM-Zusammenfassungen generieren
|
||||
await self._generate_summaries(content)
|
||||
|
||||
# HTML generieren
|
||||
content.html = self._generate_html(content)
|
||||
|
||||
# In DB speichern
|
||||
digest = self._save_digest(content)
|
||||
|
||||
return digest
|
||||
|
||||
def _load_alerts_for_period(
|
||||
self,
|
||||
user_id: str,
|
||||
start: datetime,
|
||||
end: datetime
|
||||
) -> List[AlertItemDB]:
|
||||
"""Lade alle Alerts fuer einen Zeitraum."""
|
||||
return self.db.query(AlertItemDB).filter(
|
||||
AlertItemDB.user_id == user_id,
|
||||
AlertItemDB.fetched_at >= start,
|
||||
AlertItemDB.fetched_at < end,
|
||||
AlertItemDB.status != "dropped"
|
||||
).order_by(AlertItemDB.fetched_at.desc()).all()
|
||||
|
||||
def _group_by_importance(
|
||||
self,
|
||||
alerts: List[AlertItemDB]
|
||||
) -> List[DigestSection]:
|
||||
"""Gruppiere Alerts nach Wichtigkeit."""
|
||||
importance_config = [
|
||||
(ImportanceLevelEnum.KRITISCH, "Kritisch", "#dc2626"),
|
||||
(ImportanceLevelEnum.DRINGEND, "Dringend", "#ea580c"),
|
||||
(ImportanceLevelEnum.WICHTIG, "Wichtig", "#d97706"),
|
||||
(ImportanceLevelEnum.PRUEFEN, "Zu pruefen", "#2563eb"),
|
||||
(ImportanceLevelEnum.INFO, "Info", "#64748b"),
|
||||
]
|
||||
|
||||
sections = []
|
||||
for level, label, color in importance_config:
|
||||
items = [a for a in alerts if a.importance_level == level]
|
||||
if items:
|
||||
sections.append(DigestSection(
|
||||
importance_level=level,
|
||||
label_de=label,
|
||||
color=color,
|
||||
items=items[:5] # Max 5 pro Kategorie
|
||||
))
|
||||
|
||||
return sections
|
||||
|
||||
async def _generate_summaries(self, content: DigestContent):
|
||||
"""Generiere LLM-basierte Zusammenfassungen."""
|
||||
# Einleitung generieren
|
||||
content.introduction = await self._generate_introduction(content)
|
||||
|
||||
# Zusammenfassungen pro Sektion
|
||||
for section in content.sections:
|
||||
section.summary = await self._generate_section_summary(section)
|
||||
|
||||
async def _generate_introduction(self, content: DigestContent) -> str:
|
||||
"""Generiere eine einleitende Zusammenfassung."""
|
||||
prompt = f"""Du bist ein Assistent fuer Schulleitungen und Lehrkraefte in Deutschland.
|
||||
Schreibe eine kurze Einleitung (2-3 Saetze) fuer einen Wochenbericht.
|
||||
|
||||
Zeitraum: {content.period_start.strftime('%d.%m.%Y')} - {content.period_end.strftime('%d.%m.%Y')}
|
||||
Gesamt: {content.total_alerts} Meldungen
|
||||
Kritisch: {content.critical_count}
|
||||
Dringend: {content.urgent_count}
|
||||
|
||||
Schreibe auf Deutsch in einfacher Sprache (B1/B2 Niveau).
|
||||
Beginne mit "Diese Woche..." oder "In der vergangenen Woche..."."""
|
||||
|
||||
return await self._call_llm(prompt, max_tokens=150)
|
||||
|
||||
async def _generate_section_summary(self, section: DigestSection) -> str:
|
||||
"""Generiere Zusammenfassung fuer eine Sektion."""
|
||||
if not section.items:
|
||||
return ""
|
||||
|
||||
titles = "\n".join([f"- {item.title}" for item in section.items[:5]])
|
||||
|
||||
prompt = f"""Fasse diese {len(section.items)} Meldungen der Kategorie "{section.label_de}" in 1-2 Saetzen zusammen:
|
||||
|
||||
{titles}
|
||||
|
||||
Schreibe auf Deutsch in einfacher Sprache. Nenne die wichtigsten Handlungsbedarfe."""
|
||||
|
||||
return await self._call_llm(prompt, max_tokens=100)
|
||||
|
||||
async def _call_llm(self, prompt: str, max_tokens: int = 200) -> str:
|
||||
"""Rufe das LLM auf."""
|
||||
try:
|
||||
if self.llm_provider == "ollama":
|
||||
return await self._call_ollama(prompt, max_tokens)
|
||||
elif self.llm_provider == "openai":
|
||||
return await self._call_openai(prompt, max_tokens)
|
||||
elif self.llm_provider == "anthropic":
|
||||
return await self._call_anthropic(prompt, max_tokens)
|
||||
else:
|
||||
return self._generate_fallback_summary(prompt)
|
||||
except Exception as e:
|
||||
print(f"LLM call failed: {e}")
|
||||
return self._generate_fallback_summary(prompt)
|
||||
|
||||
async def _call_ollama(self, prompt: str, max_tokens: int) -> str:
|
||||
"""Rufe lokales Ollama-Modell auf."""
|
||||
import httpx
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(
|
||||
"http://localhost:11434/api/generate",
|
||||
json={
|
||||
"model": self.llm_model,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"num_predict": max_tokens,
|
||||
"temperature": 0.7
|
||||
}
|
||||
}
|
||||
)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
return data.get("response", "").strip()
|
||||
except Exception as e:
|
||||
print(f"Ollama error: {e}")
|
||||
|
||||
return self._generate_fallback_summary(prompt)
|
||||
|
||||
async def _call_openai(self, prompt: str, max_tokens: int) -> str:
|
||||
"""Rufe OpenAI API auf."""
|
||||
import httpx
|
||||
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
if not api_key:
|
||||
return self._generate_fallback_summary(prompt)
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(
|
||||
"https://api.openai.com/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": self.llm_model or "gpt-4o-mini",
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": 0.7
|
||||
}
|
||||
)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
return data["choices"][0]["message"]["content"].strip()
|
||||
except Exception as e:
|
||||
print(f"OpenAI error: {e}")
|
||||
|
||||
return self._generate_fallback_summary(prompt)
|
||||
|
||||
async def _call_anthropic(self, prompt: str, max_tokens: int) -> str:
|
||||
"""Rufe Anthropic API auf."""
|
||||
import httpx
|
||||
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY")
|
||||
if not api_key:
|
||||
return self._generate_fallback_summary(prompt)
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(
|
||||
"https://api.anthropic.com/v1/messages",
|
||||
headers={
|
||||
"x-api-key": api_key,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
json={
|
||||
"model": self.llm_model or "claude-3-5-sonnet-latest",
|
||||
"max_tokens": max_tokens,
|
||||
"messages": [{"role": "user", "content": prompt}]
|
||||
}
|
||||
)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
return data["content"][0]["text"].strip()
|
||||
except Exception as e:
|
||||
print(f"Anthropic error: {e}")
|
||||
|
||||
return self._generate_fallback_summary(prompt)
|
||||
|
||||
def _generate_fallback_summary(self, prompt: str) -> str:
|
||||
"""Fallback ohne LLM."""
|
||||
if "Einleitung" in prompt or "Wochenbericht" in prompt:
|
||||
return "Diese Woche haben Sie neue relevante Meldungen erhalten. Hier ist Ihre Zusammenfassung."
|
||||
return "Mehrere relevante Meldungen zu diesem Thema."
|
||||
|
||||
def _generate_html(self, content: DigestContent) -> str:
|
||||
"""Generiere HTML fuer den Digest."""
|
||||
sections_html = ""
|
||||
|
||||
for section in content.sections:
|
||||
items_html = ""
|
||||
for item in section.items:
|
||||
items_html += f"""
|
||||
<tr>
|
||||
<td style="padding: 12px; border-bottom: 1px solid #e2e8f0;">
|
||||
<div style="font-weight: 500; color: #1e293b; margin-bottom: 4px;">{item.title}</div>
|
||||
<div style="font-size: 12px; color: #64748b;">{item.source_name or 'Unbekannt'}</div>
|
||||
</td>
|
||||
<td style="padding: 12px; border-bottom: 1px solid #e2e8f0; text-align: right;">
|
||||
<a href="{item.url or '#'}" style="color: #3b82f6; text-decoration: none;">Oeffnen</a>
|
||||
</td>
|
||||
</tr>
|
||||
"""
|
||||
|
||||
sections_html += f"""
|
||||
<div style="margin-bottom: 24px;">
|
||||
<div style="display: flex; align-items: center; margin-bottom: 12px;">
|
||||
<span style="display: inline-block; width: 12px; height: 12px; background: {section.color}; border-radius: 50%; margin-right: 8px;"></span>
|
||||
<h3 style="margin: 0; font-size: 18px; color: #1e293b;">{section.label_de}</h3>
|
||||
<span style="margin-left: 8px; font-size: 14px; color: #64748b;">({len(section.items)} Meldungen)</span>
|
||||
</div>
|
||||
{f'<p style="font-size: 14px; color: #475569; margin-bottom: 12px;">{section.summary}</p>' if section.summary else ''}
|
||||
<table style="width: 100%; border-collapse: collapse;">
|
||||
{items_html}
|
||||
</table>
|
||||
</div>
|
||||
"""
|
||||
|
||||
return f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Wochenbericht - BreakPilot Alerts</title>
|
||||
</head>
|
||||
<body style="font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; max-width: 700px; margin: 0 auto; padding: 20px; background: #f8fafc;">
|
||||
<div style="background: white; border-radius: 12px; padding: 32px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
|
||||
<!-- Header -->
|
||||
<div style="text-align: center; margin-bottom: 32px; padding-bottom: 24px; border-bottom: 1px solid #e2e8f0;">
|
||||
<h1 style="margin: 0 0 8px 0; font-size: 24px; color: #1e293b;">Wochenbericht</h1>
|
||||
<p style="margin: 0; color: #64748b; font-size: 14px;">
|
||||
{content.period_start.strftime('%d.%m.%Y')} - {content.period_end.strftime('%d.%m.%Y')}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<!-- Stats -->
|
||||
<div style="display: flex; justify-content: center; gap: 32px; margin-bottom: 32px; padding: 16px; background: #f1f5f9; border-radius: 8px;">
|
||||
<div style="text-align: center;">
|
||||
<div style="font-size: 28px; font-weight: 700; color: #1e293b;">{content.total_alerts}</div>
|
||||
<div style="font-size: 12px; color: #64748b;">Gesamt</div>
|
||||
</div>
|
||||
<div style="text-align: center;">
|
||||
<div style="font-size: 28px; font-weight: 700; color: #dc2626;">{content.critical_count}</div>
|
||||
<div style="font-size: 12px; color: #64748b;">Kritisch</div>
|
||||
</div>
|
||||
<div style="text-align: center;">
|
||||
<div style="font-size: 28px; font-weight: 700; color: #ea580c;">{content.urgent_count}</div>
|
||||
<div style="font-size: 12px; color: #64748b;">Dringend</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Introduction -->
|
||||
{f'<p style="font-size: 15px; color: #334155; line-height: 1.6; margin-bottom: 24px;">{content.introduction}</p>' if content.introduction else ''}
|
||||
|
||||
<!-- Sections -->
|
||||
{sections_html}
|
||||
|
||||
<!-- Footer -->
|
||||
<div style="margin-top: 32px; padding-top: 24px; border-top: 1px solid #e2e8f0; text-align: center; font-size: 12px; color: #94a3b8;">
|
||||
<p>Dieser Bericht wurde automatisch von BreakPilot Alerts erstellt.</p>
|
||||
<p><a href="#" style="color: #3b82f6; text-decoration: none;">Einstellungen anpassen</a> | <a href="#" style="color: #3b82f6; text-decoration: none;">Abmelden</a></p>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
def _save_digest(self, content: DigestContent) -> AlertDigestDB:
|
||||
"""Speichere Digest in der Datenbank."""
|
||||
# Finde Subscription fuer User
|
||||
subscription = self.db.query(UserAlertSubscriptionDB).filter(
|
||||
UserAlertSubscriptionDB.user_id == content.user_id,
|
||||
UserAlertSubscriptionDB.is_active == True
|
||||
).first()
|
||||
|
||||
digest = AlertDigestDB(
|
||||
id=str(uuid.uuid4()),
|
||||
subscription_id=subscription.id if subscription else None,
|
||||
user_id=content.user_id,
|
||||
period_start=content.period_start,
|
||||
period_end=content.period_end,
|
||||
summary_html=content.html,
|
||||
total_alerts=content.total_alerts,
|
||||
critical_count=content.critical_count,
|
||||
urgent_count=content.urgent_count,
|
||||
important_count=sum(len(s.items) for s in content.sections if s.importance_level == ImportanceLevelEnum.WICHTIG),
|
||||
review_count=sum(len(s.items) for s in content.sections if s.importance_level == ImportanceLevelEnum.PRUEFEN),
|
||||
info_count=sum(len(s.items) for s in content.sections if s.importance_level == ImportanceLevelEnum.INFO),
|
||||
status=DigestStatusEnum.PENDING
|
||||
)
|
||||
|
||||
self.db.add(digest)
|
||||
self.db.commit()
|
||||
self.db.refresh(digest)
|
||||
|
||||
return digest
|
||||
|
||||
|
||||
async def generate_digest_for_all_users(db_session) -> int:
|
||||
"""
|
||||
Generiere Digests fuer alle aktiven Subscriptions.
|
||||
|
||||
Wird vom Scheduler (z.B. Celery, APScheduler) aufgerufen.
|
||||
|
||||
Returns:
|
||||
Anzahl generierter Digests
|
||||
"""
|
||||
# Finde alle aktiven Subscriptions mit Digest aktiviert
|
||||
subscriptions = db_session.query(UserAlertSubscriptionDB).filter(
|
||||
UserAlertSubscriptionDB.is_active == True,
|
||||
UserAlertSubscriptionDB.digest_enabled == True
|
||||
).all()
|
||||
|
||||
generator = DigestGenerator(db_session)
|
||||
count = 0
|
||||
|
||||
for sub in subscriptions:
|
||||
try:
|
||||
digest = await generator.generate_weekly_digest(sub.user_id)
|
||||
if digest:
|
||||
count += 1
|
||||
except Exception as e:
|
||||
print(f"Error generating digest for user {sub.user_id}: {e}")
|
||||
|
||||
return count
|
||||
341
backend/alerts_agent/processing/importance.py
Normal file
341
backend/alerts_agent/processing/importance.py
Normal file
@@ -0,0 +1,341 @@
|
||||
"""
|
||||
Importance Mapping für Guided Mode.
|
||||
|
||||
Konvertiert Relevanz-Scores (0.0-1.0) in 5-stufige Wichtigkeitsstufen:
|
||||
- KRITISCH (90-100%): Sofortiges Handeln erforderlich
|
||||
- DRINGEND (75-90%): Wichtig, bald handeln
|
||||
- WICHTIG (60-75%): Beachtenswert
|
||||
- PRÜFEN (40-60%): Eventuell relevant
|
||||
- INFO (0-40%): Zur Kenntnisnahme
|
||||
|
||||
Zusätzlich: Generierung von "Warum relevant?"-Erklärungen und nächsten Schritten.
|
||||
"""
|
||||
|
||||
from typing import Optional, List, Dict, Any
|
||||
from datetime import datetime, timedelta
|
||||
import re
|
||||
|
||||
from ..db.models import ImportanceLevelEnum, AlertItemDB
|
||||
|
||||
# Re-export fuer einfacheren Import
|
||||
__all__ = [
|
||||
'ImportanceLevelEnum',
|
||||
'score_to_importance',
|
||||
'importance_to_label_de',
|
||||
'importance_to_color',
|
||||
'extract_deadline',
|
||||
'generate_why_relevant',
|
||||
'generate_next_steps',
|
||||
'enrich_alert_for_guided_mode',
|
||||
'batch_enrich_alerts',
|
||||
'filter_by_importance',
|
||||
]
|
||||
|
||||
|
||||
# Standard-Schwellenwerte für Importance-Mapping
|
||||
DEFAULT_THRESHOLDS = {
|
||||
"kritisch": 0.90,
|
||||
"dringend": 0.75,
|
||||
"wichtig": 0.60,
|
||||
"pruefen": 0.40,
|
||||
}
|
||||
|
||||
|
||||
def score_to_importance(
|
||||
score: float,
|
||||
thresholds: Dict[str, float] = None
|
||||
) -> ImportanceLevelEnum:
|
||||
"""
|
||||
Konvertiere Relevanz-Score zu Importance-Level.
|
||||
|
||||
Args:
|
||||
score: Relevanz-Score (0.0 - 1.0)
|
||||
thresholds: Optionale benutzerdefinierte Schwellenwerte
|
||||
|
||||
Returns:
|
||||
ImportanceLevelEnum
|
||||
"""
|
||||
if score is None:
|
||||
return ImportanceLevelEnum.INFO
|
||||
|
||||
thresholds = thresholds or DEFAULT_THRESHOLDS
|
||||
|
||||
if score >= thresholds.get("kritisch", 0.90):
|
||||
return ImportanceLevelEnum.KRITISCH
|
||||
elif score >= thresholds.get("dringend", 0.75):
|
||||
return ImportanceLevelEnum.DRINGEND
|
||||
elif score >= thresholds.get("wichtig", 0.60):
|
||||
return ImportanceLevelEnum.WICHTIG
|
||||
elif score >= thresholds.get("pruefen", 0.40):
|
||||
return ImportanceLevelEnum.PRUEFEN
|
||||
else:
|
||||
return ImportanceLevelEnum.INFO
|
||||
|
||||
|
||||
def importance_to_label_de(importance: ImportanceLevelEnum) -> str:
|
||||
"""Deutsches Label für Importance-Level."""
|
||||
labels = {
|
||||
ImportanceLevelEnum.KRITISCH: "Kritisch",
|
||||
ImportanceLevelEnum.DRINGEND: "Dringend",
|
||||
ImportanceLevelEnum.WICHTIG: "Wichtig",
|
||||
ImportanceLevelEnum.PRUEFEN: "Zu prüfen",
|
||||
ImportanceLevelEnum.INFO: "Info",
|
||||
}
|
||||
return labels.get(importance, "Info")
|
||||
|
||||
|
||||
def importance_to_color(importance: ImportanceLevelEnum) -> str:
|
||||
"""CSS-Farbe für Importance-Level (Tailwind-kompatibel)."""
|
||||
colors = {
|
||||
ImportanceLevelEnum.KRITISCH: "red",
|
||||
ImportanceLevelEnum.DRINGEND: "orange",
|
||||
ImportanceLevelEnum.WICHTIG: "amber",
|
||||
ImportanceLevelEnum.PRUEFEN: "blue",
|
||||
ImportanceLevelEnum.INFO: "slate",
|
||||
}
|
||||
return colors.get(importance, "slate")
|
||||
|
||||
|
||||
def extract_deadline(text: str) -> Optional[datetime]:
|
||||
"""
|
||||
Extrahiere Deadline/Frist aus Text.
|
||||
|
||||
Sucht nach Mustern wie:
|
||||
- "bis zum 15.03.2026"
|
||||
- "Frist: 1. April"
|
||||
- "Anmeldeschluss: 30.11."
|
||||
"""
|
||||
# Deutsche Datumsformate
|
||||
patterns = [
|
||||
r"bis\s+(?:zum\s+)?(\d{1,2})\.(\d{1,2})\.(\d{4})",
|
||||
r"Frist[:\s]+(\d{1,2})\.(\d{1,2})\.(\d{4})",
|
||||
r"(?:Anmelde|Bewerbungs)schluss[:\s]+(\d{1,2})\.(\d{1,2})\.?(?:(\d{4}))?",
|
||||
r"endet\s+am\s+(\d{1,2})\.(\d{1,2})\.(\d{4})?",
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, text, re.IGNORECASE)
|
||||
if match:
|
||||
day = int(match.group(1))
|
||||
month = int(match.group(2))
|
||||
year = int(match.group(3)) if match.group(3) else datetime.now().year
|
||||
try:
|
||||
return datetime(year, month, day)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def generate_why_relevant(
|
||||
alert: AlertItemDB,
|
||||
profile_priorities: List[Dict[str, Any]] = None,
|
||||
matched_keywords: List[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
Generiere "Warum relevant?"-Erklärung für einen Alert.
|
||||
|
||||
Args:
|
||||
alert: Der Alert
|
||||
profile_priorities: Prioritäten aus dem User-Profil
|
||||
matched_keywords: Keywords, die gematcht haben
|
||||
|
||||
Returns:
|
||||
Deutsche Erklärung (1-2 Bulletpoints)
|
||||
"""
|
||||
reasons = []
|
||||
|
||||
# Deadline-basierte Relevanz
|
||||
deadline = extract_deadline(f"{alert.title} {alert.snippet}")
|
||||
if deadline:
|
||||
days_until = (deadline - datetime.now()).days
|
||||
if days_until <= 0:
|
||||
reasons.append("Frist abgelaufen oder heute!")
|
||||
elif days_until <= 7:
|
||||
reasons.append(f"Frist endet in {days_until} Tagen")
|
||||
elif days_until <= 30:
|
||||
reasons.append(f"Frist in ca. {days_until} Tagen")
|
||||
|
||||
# Keyword-basierte Relevanz
|
||||
if matched_keywords and len(matched_keywords) > 0:
|
||||
keywords_str = ", ".join(matched_keywords[:3])
|
||||
reasons.append(f"Enthält relevante Begriffe: {keywords_str}")
|
||||
|
||||
# Prioritäten-basierte Relevanz
|
||||
if profile_priorities:
|
||||
for priority in profile_priorities[:2]:
|
||||
label = priority.get("label", "")
|
||||
keywords = priority.get("keywords", [])
|
||||
text_lower = f"{alert.title} {alert.snippet}".lower()
|
||||
for kw in keywords:
|
||||
if kw.lower() in text_lower:
|
||||
reasons.append(f"Passt zu Ihrem Interesse: {label}")
|
||||
break
|
||||
|
||||
# Score-basierte Relevanz
|
||||
if alert.relevance_score and alert.relevance_score >= 0.8:
|
||||
reasons.append("Hohe Übereinstimmung mit Ihrem Profil")
|
||||
|
||||
# Fallback
|
||||
if not reasons:
|
||||
reasons.append("Passt zu Ihren ausgewählten Themen")
|
||||
|
||||
# Formatiere als Bulletpoints
|
||||
return " • ".join(reasons[:2])
|
||||
|
||||
|
||||
def generate_next_steps(
|
||||
alert: AlertItemDB,
|
||||
template_slug: str = None
|
||||
) -> List[str]:
|
||||
"""
|
||||
Generiere empfohlene nächste Schritte.
|
||||
|
||||
Basiert auf Template-Typ und Alert-Inhalt.
|
||||
"""
|
||||
steps = []
|
||||
text = f"{alert.title} {alert.snippet}".lower()
|
||||
|
||||
# Template-spezifische Schritte
|
||||
if template_slug == "foerderprogramme":
|
||||
if "antrag" in text or "förder" in text:
|
||||
steps.append("Schulträger über Fördermöglichkeit informieren")
|
||||
steps.append("Antragsunterlagen prüfen")
|
||||
if "frist" in text or "deadline" in text:
|
||||
steps.append("Termin in Kalender eintragen")
|
||||
|
||||
elif template_slug == "datenschutz-recht":
|
||||
if "dsgvo" in text or "datenschutz" in text:
|
||||
steps.append("Datenschutzbeauftragten informieren")
|
||||
steps.append("Prüfen, ob Handlungsbedarf besteht")
|
||||
if "urteil" in text or "gericht" in text:
|
||||
steps.append("Rechtsfolgen für die Schule prüfen")
|
||||
|
||||
elif template_slug == "it-security":
|
||||
if "cve" in text or "sicherheitslücke" in text:
|
||||
steps.append("Betroffene Systeme prüfen")
|
||||
steps.append("Update/Patch einspielen")
|
||||
if "phishing" in text:
|
||||
steps.append("Kollegium warnen")
|
||||
steps.append("Erkennungsmerkmale kommunizieren")
|
||||
|
||||
elif template_slug == "abitur-updates":
|
||||
if "abitur" in text or "prüfung" in text:
|
||||
steps.append("Fachschaften informieren")
|
||||
steps.append("Anpassung der Kursplanung prüfen")
|
||||
|
||||
elif template_slug == "fortbildungen":
|
||||
steps.append("Termin und Ort prüfen")
|
||||
steps.append("Bei Interesse: Anmeldung vornehmen")
|
||||
|
||||
elif template_slug == "wettbewerbe-projekte":
|
||||
steps.append("Passende Schülergruppe identifizieren")
|
||||
steps.append("Anmeldefrist beachten")
|
||||
|
||||
# Allgemeine Schritte als Fallback
|
||||
if not steps:
|
||||
steps.append("Quelle öffnen und Details lesen")
|
||||
if "frist" in text or "bis" in text:
|
||||
steps.append("Termin notieren")
|
||||
|
||||
return steps[:3] # Maximal 3 Schritte
|
||||
|
||||
|
||||
def enrich_alert_for_guided_mode(
|
||||
alert: AlertItemDB,
|
||||
profile_priorities: List[Dict[str, Any]] = None,
|
||||
template_slug: str = None,
|
||||
importance_thresholds: Dict[str, float] = None
|
||||
) -> AlertItemDB:
|
||||
"""
|
||||
Reichere Alert mit Guided-Mode-spezifischen Feldern an.
|
||||
|
||||
Setzt:
|
||||
- importance_level
|
||||
- why_relevant
|
||||
- next_steps
|
||||
- action_deadline
|
||||
|
||||
Args:
|
||||
alert: Der Alert
|
||||
profile_priorities: Prioritäten aus dem User-Profil
|
||||
template_slug: Slug des aktiven Templates
|
||||
importance_thresholds: Optionale Schwellenwerte
|
||||
|
||||
Returns:
|
||||
Der angereicherte Alert
|
||||
"""
|
||||
# Importance Level
|
||||
alert.importance_level = score_to_importance(
|
||||
alert.relevance_score,
|
||||
importance_thresholds
|
||||
)
|
||||
|
||||
# Why Relevant
|
||||
alert.why_relevant = generate_why_relevant(alert, profile_priorities)
|
||||
|
||||
# Next Steps
|
||||
alert.next_steps = generate_next_steps(alert, template_slug)
|
||||
|
||||
# Action Deadline
|
||||
deadline = extract_deadline(f"{alert.title} {alert.snippet}")
|
||||
if deadline:
|
||||
alert.action_deadline = deadline
|
||||
|
||||
return alert
|
||||
|
||||
|
||||
def batch_enrich_alerts(
|
||||
alerts: List[AlertItemDB],
|
||||
profile_priorities: List[Dict[str, Any]] = None,
|
||||
template_slug: str = None,
|
||||
importance_thresholds: Dict[str, float] = None
|
||||
) -> List[AlertItemDB]:
|
||||
"""
|
||||
Reichere mehrere Alerts für Guided Mode an.
|
||||
"""
|
||||
return [
|
||||
enrich_alert_for_guided_mode(
|
||||
alert,
|
||||
profile_priorities,
|
||||
template_slug,
|
||||
importance_thresholds
|
||||
)
|
||||
for alert in alerts
|
||||
]
|
||||
|
||||
|
||||
def filter_by_importance(
|
||||
alerts: List[AlertItemDB],
|
||||
min_level: ImportanceLevelEnum = ImportanceLevelEnum.INFO,
|
||||
max_count: int = 10
|
||||
) -> List[AlertItemDB]:
|
||||
"""
|
||||
Filtere Alerts nach Mindest-Importance und limitiere Anzahl.
|
||||
|
||||
Sortiert nach Importance (höchste zuerst).
|
||||
"""
|
||||
# Importance-Ranking (höher = wichtiger)
|
||||
importance_rank = {
|
||||
ImportanceLevelEnum.KRITISCH: 5,
|
||||
ImportanceLevelEnum.DRINGEND: 4,
|
||||
ImportanceLevelEnum.WICHTIG: 3,
|
||||
ImportanceLevelEnum.PRUEFEN: 2,
|
||||
ImportanceLevelEnum.INFO: 1,
|
||||
}
|
||||
|
||||
min_rank = importance_rank.get(min_level, 1)
|
||||
|
||||
# Filter
|
||||
filtered = [
|
||||
a for a in alerts
|
||||
if importance_rank.get(a.importance_level, 1) >= min_rank
|
||||
]
|
||||
|
||||
# Sortiere nach Importance (absteigend)
|
||||
filtered.sort(
|
||||
key=lambda a: importance_rank.get(a.importance_level, 1),
|
||||
reverse=True
|
||||
)
|
||||
|
||||
return filtered[:max_count]
|
||||
390
backend/alerts_agent/processing/relevance_scorer.py
Normal file
390
backend/alerts_agent/processing/relevance_scorer.py
Normal file
@@ -0,0 +1,390 @@
|
||||
"""
|
||||
Relevance Scorer für Alerts.
|
||||
|
||||
Nutzt das LLM Gateway um Alerts auf Relevanz zu prüfen.
|
||||
Berücksichtigt das Nutzerprofil für personalisierte Filterung.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from ..models.alert_item import AlertItem, AlertStatus
|
||||
from ..models.relevance_profile import RelevanceProfile
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RelevanceDecision(str, Enum):
|
||||
"""Relevanz-Entscheidung für einen Alert."""
|
||||
KEEP = "KEEP" # Relevant, in Inbox anzeigen
|
||||
DROP = "DROP" # Irrelevant, automatisch archivieren
|
||||
REVIEW = "REVIEW" # Unsicher, Nutzer soll entscheiden
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScoringResult:
|
||||
"""Ergebnis des Relevanz-Scorings."""
|
||||
alert_id: str
|
||||
score: float # 0.0 - 1.0
|
||||
decision: RelevanceDecision
|
||||
reason_codes: list = field(default_factory=list)
|
||||
summary: Optional[str] = None
|
||||
entities: dict = field(default_factory=dict) # Extrahierte Entitäten
|
||||
model_version: str = ""
|
||||
prompt_version: str = "1.0"
|
||||
scored_at: datetime = field(default_factory=datetime.utcnow)
|
||||
error: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"alert_id": self.alert_id,
|
||||
"score": self.score,
|
||||
"decision": self.decision.value,
|
||||
"reason_codes": self.reason_codes,
|
||||
"summary": self.summary,
|
||||
"entities": self.entities,
|
||||
"model_version": self.model_version,
|
||||
"prompt_version": self.prompt_version,
|
||||
"scored_at": self.scored_at.isoformat(),
|
||||
"error": self.error,
|
||||
}
|
||||
|
||||
|
||||
# System Prompt für Relevanz-Scoring
|
||||
RELEVANCE_SYSTEM_PROMPT = """Du bist ein Relevanz-Filter für News-Alerts. Deine Aufgabe ist es, zu bewerten, ob ein Alert für den Nutzer relevant ist.
|
||||
|
||||
## Deine Aufgaben:
|
||||
1. Analysiere den Alert-Titel und Snippet
|
||||
2. Berücksichtige das Nutzerprofil (Prioritäten, Ausschlüsse, Beispiele)
|
||||
3. Gib eine Relevanz-Bewertung ab
|
||||
|
||||
## Bewertungskriterien:
|
||||
- **KEEP** (Score 0.7-1.0): Alert ist klar relevant für die Prioritäten des Nutzers
|
||||
- **REVIEW** (Score 0.4-0.7): Möglicherweise relevant, Nutzer sollte entscheiden
|
||||
- **DROP** (Score 0.0-0.4): Nicht relevant, kann ignoriert werden
|
||||
|
||||
## Ausschluss-Gründe (automatisch DROP):
|
||||
- Stellenanzeigen, Werbung, Pressemitteilungen (außer hochrelevant)
|
||||
- Duplicate/sehr ähnliche Meldung zu kürzlichem Alert
|
||||
- Thema in Ausschlussliste des Nutzers
|
||||
|
||||
## Output-Format:
|
||||
Du MUSST mit einem JSON-Objekt antworten (keine Markdown-Codeblöcke, nur das JSON):
|
||||
{
|
||||
"score": 0.85,
|
||||
"decision": "KEEP",
|
||||
"reason_codes": ["matches_priority_inklusion", "recent_news"],
|
||||
"summary": "Kurze Zusammenfassung des Alerts (1-2 Sätze)",
|
||||
"entities": {
|
||||
"topics": ["Inklusion", "Bayern"],
|
||||
"organizations": ["Kultusministerium"],
|
||||
"date_context": "aktuell"
|
||||
}
|
||||
}
|
||||
|
||||
Wichtig:
|
||||
- score ist eine Zahl zwischen 0.0 und 1.0
|
||||
- decision ist entweder "KEEP", "DROP" oder "REVIEW"
|
||||
- reason_codes sind kurze, maschinenlesbare Codes
|
||||
- summary ist auf Deutsch
|
||||
"""
|
||||
|
||||
|
||||
class RelevanceScorer:
|
||||
"""
|
||||
Scorer für Alert-Relevanz.
|
||||
|
||||
Nutzt das LLM Gateway für die Bewertung.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
gateway_url: str = "http://localhost:8000/llm",
|
||||
api_key: str = "",
|
||||
model: str = "breakpilot-teacher-8b",
|
||||
timeout: int = 30,
|
||||
):
|
||||
"""
|
||||
Initialisiere RelevanceScorer.
|
||||
|
||||
Args:
|
||||
gateway_url: URL des LLM Gateway
|
||||
api_key: API Key für Gateway
|
||||
model: Modell für Scoring
|
||||
timeout: HTTP Timeout
|
||||
"""
|
||||
self.gateway_url = gateway_url.rstrip("/")
|
||||
self.api_key = api_key
|
||||
self.model = model
|
||||
self.timeout = timeout
|
||||
self._client: Optional[httpx.AsyncClient] = None
|
||||
|
||||
# Schwellenwerte
|
||||
self.keep_threshold = 0.7
|
||||
self.drop_threshold = 0.4
|
||||
|
||||
async def _get_client(self) -> httpx.AsyncClient:
|
||||
"""Hole oder erstelle HTTP Client."""
|
||||
if self._client is None or self._client.is_closed:
|
||||
self._client = httpx.AsyncClient(
|
||||
timeout=self.timeout,
|
||||
headers={
|
||||
"X-API-Key": self.api_key,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
)
|
||||
return self._client
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Schließe HTTP Client."""
|
||||
if self._client:
|
||||
await self._client.aclose()
|
||||
self._client = None
|
||||
|
||||
def _build_user_prompt(self, alert: AlertItem) -> str:
|
||||
"""Erstelle User-Prompt für einen Alert."""
|
||||
parts = [
|
||||
f"## Alert zu bewerten\n",
|
||||
f"**Thema-Label:** {alert.topic_label}",
|
||||
f"**Titel:** {alert.title}",
|
||||
]
|
||||
|
||||
if alert.snippet:
|
||||
# Snippet auf 500 Zeichen begrenzen
|
||||
snippet = alert.snippet[:500]
|
||||
if len(alert.snippet) > 500:
|
||||
snippet += "..."
|
||||
parts.append(f"**Snippet:** {snippet}")
|
||||
|
||||
if alert.url:
|
||||
parts.append(f"**URL:** {alert.url}")
|
||||
|
||||
if alert.published_at:
|
||||
parts.append(f"**Veröffentlicht:** {alert.published_at.strftime('%Y-%m-%d')}")
|
||||
|
||||
parts.append("\nBewerte diesen Alert und antworte NUR mit dem JSON-Objekt.")
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
def _build_system_prompt(self, profile: Optional[RelevanceProfile] = None) -> str:
|
||||
"""Erstelle System-Prompt mit optionalem Profil."""
|
||||
system = RELEVANCE_SYSTEM_PROMPT
|
||||
|
||||
if profile:
|
||||
system += "\n\n" + profile.get_prompt_context()
|
||||
|
||||
return system
|
||||
|
||||
def _parse_response(self, text: str, alert_id: str) -> ScoringResult:
|
||||
"""Parse LLM Response in ScoringResult."""
|
||||
try:
|
||||
# Versuche JSON zu extrahieren
|
||||
# Manchmal wrapped das LLM in Markdown Code-Blocks
|
||||
json_match = re.search(r"\{[\s\S]*\}", text)
|
||||
if not json_match:
|
||||
raise ValueError("Kein JSON in Response gefunden")
|
||||
|
||||
data = json.loads(json_match.group())
|
||||
|
||||
score = float(data.get("score", 0.5))
|
||||
score = max(0.0, min(1.0, score)) # Clamp to 0-1
|
||||
|
||||
decision_str = data.get("decision", "REVIEW").upper()
|
||||
try:
|
||||
decision = RelevanceDecision(decision_str)
|
||||
except ValueError:
|
||||
# Fallback basierend auf Score
|
||||
if score >= self.keep_threshold:
|
||||
decision = RelevanceDecision.KEEP
|
||||
elif score <= self.drop_threshold:
|
||||
decision = RelevanceDecision.DROP
|
||||
else:
|
||||
decision = RelevanceDecision.REVIEW
|
||||
|
||||
return ScoringResult(
|
||||
alert_id=alert_id,
|
||||
score=score,
|
||||
decision=decision,
|
||||
reason_codes=data.get("reason_codes", []),
|
||||
summary=data.get("summary"),
|
||||
entities=data.get("entities", {}),
|
||||
model_version=self.model,
|
||||
)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"JSON Parse Error für Alert {alert_id}: {e}")
|
||||
return ScoringResult(
|
||||
alert_id=alert_id,
|
||||
score=0.5,
|
||||
decision=RelevanceDecision.REVIEW,
|
||||
reason_codes=["parse_error"],
|
||||
error=f"JSON parse error: {str(e)}",
|
||||
model_version=self.model,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error parsing response: {e}")
|
||||
return ScoringResult(
|
||||
alert_id=alert_id,
|
||||
score=0.5,
|
||||
decision=RelevanceDecision.REVIEW,
|
||||
reason_codes=["error"],
|
||||
error=str(e),
|
||||
model_version=self.model,
|
||||
)
|
||||
|
||||
async def score_alert(
|
||||
self,
|
||||
alert: AlertItem,
|
||||
profile: Optional[RelevanceProfile] = None,
|
||||
) -> ScoringResult:
|
||||
"""
|
||||
Bewerte einen einzelnen Alert.
|
||||
|
||||
Args:
|
||||
alert: Der zu bewertende Alert
|
||||
profile: Optional Nutzerprofil für personalisierte Bewertung
|
||||
|
||||
Returns:
|
||||
ScoringResult mit Bewertung
|
||||
"""
|
||||
try:
|
||||
client = await self._get_client()
|
||||
|
||||
# Request Body
|
||||
payload = {
|
||||
"model": self.model,
|
||||
"messages": [
|
||||
{"role": "system", "content": self._build_system_prompt(profile)},
|
||||
{"role": "user", "content": self._build_user_prompt(alert)},
|
||||
],
|
||||
"temperature": 0.3, # Niedrig für konsistentere Ergebnisse
|
||||
"max_tokens": 500,
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{self.gateway_url}/v1/chat/completions",
|
||||
json=payload,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
|
||||
result = self._parse_response(content, alert.id)
|
||||
|
||||
# Update Alert
|
||||
alert.relevance_score = result.score
|
||||
alert.relevance_decision = result.decision.value
|
||||
alert.relevance_reasons = result.reason_codes
|
||||
alert.relevance_summary = result.summary
|
||||
alert.status = AlertStatus.SCORED
|
||||
|
||||
return result
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
error_msg = f"HTTP {e.response.status_code}: {e.response.text[:200]}"
|
||||
logger.error(f"Gateway Error für Alert {alert.id}: {error_msg}")
|
||||
return ScoringResult(
|
||||
alert_id=alert.id,
|
||||
score=0.5,
|
||||
decision=RelevanceDecision.REVIEW,
|
||||
reason_codes=["gateway_error"],
|
||||
error=error_msg,
|
||||
model_version=self.model,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Scoring Error für Alert {alert.id}")
|
||||
return ScoringResult(
|
||||
alert_id=alert.id,
|
||||
score=0.5,
|
||||
decision=RelevanceDecision.REVIEW,
|
||||
reason_codes=["error"],
|
||||
error=str(e),
|
||||
model_version=self.model,
|
||||
)
|
||||
|
||||
async def score_batch(
|
||||
self,
|
||||
alerts: list[AlertItem],
|
||||
profile: Optional[RelevanceProfile] = None,
|
||||
skip_scored: bool = True,
|
||||
) -> list[ScoringResult]:
|
||||
"""
|
||||
Bewerte mehrere Alerts.
|
||||
|
||||
Args:
|
||||
alerts: Liste von Alerts
|
||||
profile: Nutzerprofil
|
||||
skip_scored: Bereits bewertete überspringen
|
||||
|
||||
Returns:
|
||||
Liste von ScoringResults
|
||||
"""
|
||||
results = []
|
||||
|
||||
for alert in alerts:
|
||||
if skip_scored and alert.status == AlertStatus.SCORED:
|
||||
logger.debug(f"Alert {alert.id} bereits bewertet, überspringe")
|
||||
continue
|
||||
|
||||
result = await self.score_alert(alert, profile)
|
||||
results.append(result)
|
||||
|
||||
# Kurze Pause zwischen Requests um Rate Limits zu vermeiden
|
||||
# await asyncio.sleep(0.1)
|
||||
|
||||
return results
|
||||
|
||||
def get_stats(self, results: list[ScoringResult]) -> dict:
|
||||
"""Generiere Statistiken über Scoring-Ergebnisse."""
|
||||
total = len(results)
|
||||
if total == 0:
|
||||
return {"total": 0}
|
||||
|
||||
keep = sum(1 for r in results if r.decision == RelevanceDecision.KEEP)
|
||||
drop = sum(1 for r in results if r.decision == RelevanceDecision.DROP)
|
||||
review = sum(1 for r in results if r.decision == RelevanceDecision.REVIEW)
|
||||
errors = sum(1 for r in results if r.error)
|
||||
|
||||
avg_score = sum(r.score for r in results) / total
|
||||
|
||||
return {
|
||||
"total": total,
|
||||
"keep": keep,
|
||||
"drop": drop,
|
||||
"review": review,
|
||||
"errors": errors,
|
||||
"keep_rate": keep / total,
|
||||
"drop_rate": drop / total,
|
||||
"avg_score": avg_score,
|
||||
}
|
||||
|
||||
|
||||
# Singleton Instance
|
||||
_scorer_instance: Optional[RelevanceScorer] = None
|
||||
|
||||
|
||||
def get_relevance_scorer(
|
||||
gateway_url: str = "http://localhost:8000/llm",
|
||||
api_key: str = "",
|
||||
model: str = "breakpilot-teacher-8b",
|
||||
) -> RelevanceScorer:
|
||||
"""Hole Singleton RelevanceScorer Instanz."""
|
||||
global _scorer_instance
|
||||
if _scorer_instance is None:
|
||||
_scorer_instance = RelevanceScorer(
|
||||
gateway_url=gateway_url,
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
)
|
||||
return _scorer_instance
|
||||
512
backend/alerts_agent/processing/rule_engine.py
Normal file
512
backend/alerts_agent/processing/rule_engine.py
Normal file
@@ -0,0 +1,512 @@
|
||||
"""
|
||||
Rule Engine für Alerts Agent.
|
||||
|
||||
Evaluiert Regeln gegen Alert-Items und führt Aktionen aus.
|
||||
|
||||
Regel-Struktur:
|
||||
- Bedingungen: [{field, operator, value}, ...] (AND-verknüpft)
|
||||
- Aktion: keep, drop, tag, email, webhook, slack
|
||||
- Priorität: Höhere Priorität wird zuerst evaluiert
|
||||
"""
|
||||
import re
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Dict, Any, Optional, Callable
|
||||
from enum import Enum
|
||||
|
||||
from alerts_agent.db.models import AlertItemDB, AlertRuleDB, RuleActionEnum
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ConditionOperator(str, Enum):
|
||||
"""Operatoren für Regel-Bedingungen."""
|
||||
CONTAINS = "contains"
|
||||
NOT_CONTAINS = "not_contains"
|
||||
EQUALS = "equals"
|
||||
NOT_EQUALS = "not_equals"
|
||||
STARTS_WITH = "starts_with"
|
||||
ENDS_WITH = "ends_with"
|
||||
REGEX = "regex"
|
||||
GREATER_THAN = "gt"
|
||||
LESS_THAN = "lt"
|
||||
GREATER_EQUAL = "gte"
|
||||
LESS_EQUAL = "lte"
|
||||
IN_LIST = "in"
|
||||
NOT_IN_LIST = "not_in"
|
||||
|
||||
|
||||
@dataclass
|
||||
class RuleCondition:
|
||||
"""Eine einzelne Regel-Bedingung."""
|
||||
field: str # "title", "snippet", "url", "source", "relevance_score"
|
||||
operator: ConditionOperator
|
||||
value: Any # str, float, list
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict) -> "RuleCondition":
|
||||
"""Erstellt eine Bedingung aus einem Dict."""
|
||||
return cls(
|
||||
field=data.get("field", ""),
|
||||
operator=ConditionOperator(data.get("operator", data.get("op", "contains"))),
|
||||
value=data.get("value", ""),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RuleMatch:
|
||||
"""Ergebnis einer Regel-Evaluierung."""
|
||||
rule_id: str
|
||||
rule_name: str
|
||||
matched: bool
|
||||
action: RuleActionEnum
|
||||
action_config: Dict[str, Any]
|
||||
conditions_met: List[str] # Welche Bedingungen haben gematched
|
||||
|
||||
|
||||
def get_field_value(alert: AlertItemDB, field: str) -> Any:
|
||||
"""
|
||||
Extrahiert einen Feldwert aus einem Alert.
|
||||
|
||||
Args:
|
||||
alert: Alert-Item
|
||||
field: Feldname
|
||||
|
||||
Returns:
|
||||
Feldwert oder None
|
||||
"""
|
||||
field_map = {
|
||||
"title": alert.title,
|
||||
"snippet": alert.snippet,
|
||||
"url": alert.url,
|
||||
"source": alert.source.value if alert.source else "",
|
||||
"status": alert.status.value if alert.status else "",
|
||||
"relevance_score": alert.relevance_score,
|
||||
"relevance_decision": alert.relevance_decision.value if alert.relevance_decision else "",
|
||||
"lang": alert.lang,
|
||||
"topic_id": alert.topic_id,
|
||||
}
|
||||
|
||||
return field_map.get(field)
|
||||
|
||||
|
||||
def evaluate_condition(
|
||||
alert: AlertItemDB,
|
||||
condition: RuleCondition,
|
||||
) -> bool:
|
||||
"""
|
||||
Evaluiert eine einzelne Bedingung gegen einen Alert.
|
||||
|
||||
Args:
|
||||
alert: Alert-Item
|
||||
condition: Zu evaluierende Bedingung
|
||||
|
||||
Returns:
|
||||
True wenn Bedingung erfüllt
|
||||
"""
|
||||
field_value = get_field_value(alert, condition.field)
|
||||
|
||||
if field_value is None:
|
||||
return False
|
||||
|
||||
op = condition.operator
|
||||
target = condition.value
|
||||
|
||||
try:
|
||||
# String-Operationen (case-insensitive)
|
||||
if isinstance(field_value, str):
|
||||
field_lower = field_value.lower()
|
||||
target_lower = str(target).lower() if isinstance(target, str) else target
|
||||
|
||||
if op == ConditionOperator.CONTAINS:
|
||||
return target_lower in field_lower
|
||||
|
||||
elif op == ConditionOperator.NOT_CONTAINS:
|
||||
return target_lower not in field_lower
|
||||
|
||||
elif op == ConditionOperator.EQUALS:
|
||||
return field_lower == target_lower
|
||||
|
||||
elif op == ConditionOperator.NOT_EQUALS:
|
||||
return field_lower != target_lower
|
||||
|
||||
elif op == ConditionOperator.STARTS_WITH:
|
||||
return field_lower.startswith(target_lower)
|
||||
|
||||
elif op == ConditionOperator.ENDS_WITH:
|
||||
return field_lower.endswith(target_lower)
|
||||
|
||||
elif op == ConditionOperator.REGEX:
|
||||
try:
|
||||
return bool(re.search(str(target), field_value, re.IGNORECASE))
|
||||
except re.error:
|
||||
logger.warning(f"Invalid regex pattern: {target}")
|
||||
return False
|
||||
|
||||
elif op == ConditionOperator.IN_LIST:
|
||||
if isinstance(target, list):
|
||||
return any(t.lower() in field_lower for t in target if isinstance(t, str))
|
||||
return False
|
||||
|
||||
elif op == ConditionOperator.NOT_IN_LIST:
|
||||
if isinstance(target, list):
|
||||
return not any(t.lower() in field_lower for t in target if isinstance(t, str))
|
||||
return True
|
||||
|
||||
# Numerische Operationen
|
||||
elif isinstance(field_value, (int, float)):
|
||||
target_num = float(target) if target else 0
|
||||
|
||||
if op == ConditionOperator.EQUALS:
|
||||
return field_value == target_num
|
||||
|
||||
elif op == ConditionOperator.NOT_EQUALS:
|
||||
return field_value != target_num
|
||||
|
||||
elif op == ConditionOperator.GREATER_THAN:
|
||||
return field_value > target_num
|
||||
|
||||
elif op == ConditionOperator.LESS_THAN:
|
||||
return field_value < target_num
|
||||
|
||||
elif op == ConditionOperator.GREATER_EQUAL:
|
||||
return field_value >= target_num
|
||||
|
||||
elif op == ConditionOperator.LESS_EQUAL:
|
||||
return field_value <= target_num
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error evaluating condition: {e}")
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def evaluate_rule(
|
||||
alert: AlertItemDB,
|
||||
rule: AlertRuleDB,
|
||||
) -> RuleMatch:
|
||||
"""
|
||||
Evaluiert eine Regel gegen einen Alert.
|
||||
|
||||
Alle Bedingungen müssen erfüllt sein (AND-Verknüpfung).
|
||||
|
||||
Args:
|
||||
alert: Alert-Item
|
||||
rule: Zu evaluierende Regel
|
||||
|
||||
Returns:
|
||||
RuleMatch-Ergebnis
|
||||
"""
|
||||
conditions = rule.conditions or []
|
||||
conditions_met = []
|
||||
all_matched = True
|
||||
|
||||
for cond_dict in conditions:
|
||||
condition = RuleCondition.from_dict(cond_dict)
|
||||
if evaluate_condition(alert, condition):
|
||||
conditions_met.append(f"{condition.field} {condition.operator.value} {condition.value}")
|
||||
else:
|
||||
all_matched = False
|
||||
|
||||
# Wenn keine Bedingungen definiert sind, matcht die Regel immer
|
||||
if not conditions:
|
||||
all_matched = True
|
||||
|
||||
return RuleMatch(
|
||||
rule_id=rule.id,
|
||||
rule_name=rule.name,
|
||||
matched=all_matched,
|
||||
action=rule.action_type,
|
||||
action_config=rule.action_config or {},
|
||||
conditions_met=conditions_met,
|
||||
)
|
||||
|
||||
|
||||
def evaluate_rules_for_alert(
|
||||
alert: AlertItemDB,
|
||||
rules: List[AlertRuleDB],
|
||||
) -> Optional[RuleMatch]:
|
||||
"""
|
||||
Evaluiert alle Regeln gegen einen Alert und gibt den ersten Match zurück.
|
||||
|
||||
Regeln werden nach Priorität (absteigend) evaluiert.
|
||||
|
||||
Args:
|
||||
alert: Alert-Item
|
||||
rules: Liste von Regeln (sollte bereits nach Priorität sortiert sein)
|
||||
|
||||
Returns:
|
||||
Erster RuleMatch oder None
|
||||
"""
|
||||
for rule in rules:
|
||||
if not rule.is_active:
|
||||
continue
|
||||
|
||||
# Topic-Filter: Regel gilt nur für bestimmtes Topic
|
||||
if rule.topic_id and rule.topic_id != alert.topic_id:
|
||||
continue
|
||||
|
||||
match = evaluate_rule(alert, rule)
|
||||
|
||||
if match.matched:
|
||||
logger.debug(
|
||||
f"Rule '{rule.name}' matched alert '{alert.id[:8]}': "
|
||||
f"{match.conditions_met}"
|
||||
)
|
||||
return match
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class RuleEngine:
|
||||
"""
|
||||
Rule Engine für Batch-Verarbeitung von Alerts.
|
||||
|
||||
Verwendet für das Scoring von mehreren Alerts gleichzeitig.
|
||||
"""
|
||||
|
||||
def __init__(self, db_session):
|
||||
"""
|
||||
Initialisiert die Rule Engine.
|
||||
|
||||
Args:
|
||||
db_session: SQLAlchemy Session
|
||||
"""
|
||||
self.db = db_session
|
||||
self._rules_cache: Optional[List[AlertRuleDB]] = None
|
||||
|
||||
def _get_active_rules(self) -> List[AlertRuleDB]:
|
||||
"""Lädt aktive Regeln aus der Datenbank (cached)."""
|
||||
if self._rules_cache is None:
|
||||
from alerts_agent.db.repository import RuleRepository
|
||||
repo = RuleRepository(self.db)
|
||||
self._rules_cache = repo.get_active()
|
||||
|
||||
return self._rules_cache
|
||||
|
||||
def clear_cache(self) -> None:
|
||||
"""Leert den Regel-Cache."""
|
||||
self._rules_cache = None
|
||||
|
||||
def process_alert(
|
||||
self,
|
||||
alert: AlertItemDB,
|
||||
) -> Optional[RuleMatch]:
|
||||
"""
|
||||
Verarbeitet einen Alert mit allen aktiven Regeln.
|
||||
|
||||
Args:
|
||||
alert: Alert-Item
|
||||
|
||||
Returns:
|
||||
RuleMatch wenn eine Regel matcht, sonst None
|
||||
"""
|
||||
rules = self._get_active_rules()
|
||||
return evaluate_rules_for_alert(alert, rules)
|
||||
|
||||
def process_alerts(
|
||||
self,
|
||||
alerts: List[AlertItemDB],
|
||||
) -> Dict[str, RuleMatch]:
|
||||
"""
|
||||
Verarbeitet mehrere Alerts mit allen aktiven Regeln.
|
||||
|
||||
Args:
|
||||
alerts: Liste von Alert-Items
|
||||
|
||||
Returns:
|
||||
Dict von alert_id -> RuleMatch (nur für gematschte Alerts)
|
||||
"""
|
||||
rules = self._get_active_rules()
|
||||
results = {}
|
||||
|
||||
for alert in alerts:
|
||||
match = evaluate_rules_for_alert(alert, rules)
|
||||
if match:
|
||||
results[alert.id] = match
|
||||
|
||||
return results
|
||||
|
||||
def apply_rule_actions(
|
||||
self,
|
||||
alert: AlertItemDB,
|
||||
match: RuleMatch,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Wendet die Regel-Aktion auf einen Alert an.
|
||||
|
||||
Args:
|
||||
alert: Alert-Item
|
||||
match: RuleMatch mit Aktionsinformationen
|
||||
|
||||
Returns:
|
||||
Dict mit Ergebnis der Aktion
|
||||
"""
|
||||
from alerts_agent.db.repository import AlertItemRepository, RuleRepository
|
||||
|
||||
alert_repo = AlertItemRepository(self.db)
|
||||
rule_repo = RuleRepository(self.db)
|
||||
|
||||
action = match.action
|
||||
config = match.action_config
|
||||
|
||||
result = {"action": action.value, "success": False}
|
||||
|
||||
try:
|
||||
if action == RuleActionEnum.KEEP:
|
||||
# Alert als KEEP markieren
|
||||
alert_repo.update_scoring(
|
||||
alert_id=alert.id,
|
||||
score=1.0,
|
||||
decision="KEEP",
|
||||
reasons=["rule_match"],
|
||||
summary=f"Matched rule: {match.rule_name}",
|
||||
model="rule_engine",
|
||||
)
|
||||
result["success"] = True
|
||||
|
||||
elif action == RuleActionEnum.DROP:
|
||||
# Alert als DROP markieren
|
||||
alert_repo.update_scoring(
|
||||
alert_id=alert.id,
|
||||
score=0.0,
|
||||
decision="DROP",
|
||||
reasons=["rule_match"],
|
||||
summary=f"Dropped by rule: {match.rule_name}",
|
||||
model="rule_engine",
|
||||
)
|
||||
result["success"] = True
|
||||
|
||||
elif action == RuleActionEnum.TAG:
|
||||
# Tags hinzufügen
|
||||
tags = config.get("tags", [])
|
||||
if tags:
|
||||
existing_tags = alert.user_tags or []
|
||||
new_tags = list(set(existing_tags + tags))
|
||||
alert_repo.update(alert.id, user_tags=new_tags)
|
||||
result["tags_added"] = tags
|
||||
result["success"] = True
|
||||
|
||||
elif action == RuleActionEnum.EMAIL:
|
||||
# E-Mail-Benachrichtigung senden
|
||||
# Wird von Actions-Modul behandelt
|
||||
result["email_config"] = config
|
||||
result["success"] = True
|
||||
result["deferred"] = True # Wird später gesendet
|
||||
|
||||
elif action == RuleActionEnum.WEBHOOK:
|
||||
# Webhook aufrufen
|
||||
# Wird von Actions-Modul behandelt
|
||||
result["webhook_config"] = config
|
||||
result["success"] = True
|
||||
result["deferred"] = True
|
||||
|
||||
elif action == RuleActionEnum.SLACK:
|
||||
# Slack-Nachricht senden
|
||||
# Wird von Actions-Modul behandelt
|
||||
result["slack_config"] = config
|
||||
result["success"] = True
|
||||
result["deferred"] = True
|
||||
|
||||
# Match-Count erhöhen
|
||||
rule_repo.increment_match_count(match.rule_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error applying rule action: {e}")
|
||||
result["error"] = str(e)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# Convenience-Funktionen für einfache Nutzung
|
||||
def create_keyword_rule(
|
||||
name: str,
|
||||
keywords: List[str],
|
||||
action: str = "keep",
|
||||
field: str = "title",
|
||||
) -> Dict:
|
||||
"""
|
||||
Erstellt eine Keyword-basierte Regel.
|
||||
|
||||
Args:
|
||||
name: Regelname
|
||||
keywords: Liste von Keywords (OR-verknüpft über IN_LIST)
|
||||
action: Aktion (keep, drop, tag)
|
||||
field: Feld zum Prüfen (title, snippet, url)
|
||||
|
||||
Returns:
|
||||
Regel-Definition als Dict
|
||||
"""
|
||||
return {
|
||||
"name": name,
|
||||
"conditions": [
|
||||
{
|
||||
"field": field,
|
||||
"operator": "in",
|
||||
"value": keywords,
|
||||
}
|
||||
],
|
||||
"action_type": action,
|
||||
"action_config": {},
|
||||
}
|
||||
|
||||
|
||||
def create_exclusion_rule(
|
||||
name: str,
|
||||
excluded_terms: List[str],
|
||||
field: str = "title",
|
||||
) -> Dict:
|
||||
"""
|
||||
Erstellt eine Ausschluss-Regel.
|
||||
|
||||
Args:
|
||||
name: Regelname
|
||||
excluded_terms: Liste von auszuschließenden Begriffen
|
||||
field: Feld zum Prüfen
|
||||
|
||||
Returns:
|
||||
Regel-Definition als Dict
|
||||
"""
|
||||
return {
|
||||
"name": name,
|
||||
"conditions": [
|
||||
{
|
||||
"field": field,
|
||||
"operator": "in",
|
||||
"value": excluded_terms,
|
||||
}
|
||||
],
|
||||
"action_type": "drop",
|
||||
"action_config": {},
|
||||
}
|
||||
|
||||
|
||||
def create_score_threshold_rule(
|
||||
name: str,
|
||||
min_score: float,
|
||||
action: str = "keep",
|
||||
) -> Dict:
|
||||
"""
|
||||
Erstellt eine Score-basierte Regel.
|
||||
|
||||
Args:
|
||||
name: Regelname
|
||||
min_score: Mindest-Score
|
||||
action: Aktion bei Erreichen des Scores
|
||||
|
||||
Returns:
|
||||
Regel-Definition als Dict
|
||||
"""
|
||||
return {
|
||||
"name": name,
|
||||
"conditions": [
|
||||
{
|
||||
"field": "relevance_score",
|
||||
"operator": "gte",
|
||||
"value": min_score,
|
||||
}
|
||||
],
|
||||
"action_type": action,
|
||||
"action_config": {},
|
||||
}
|
||||
Reference in New Issue
Block a user