Files
breakpilot-lehrer/klausur-service/backend/mail/ai_service.py
Benjamin Boenisch 5a31f52310 Initial commit: breakpilot-lehrer - Lehrer KI Platform
Services: Admin-Lehrer, Backend-Lehrer, Studio v2, Website,
Klausur-Service, School-Service, Voice-Service, Geo-Service,
BreakPilot Drive, Agent-Core

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 23:47:26 +01:00

748 lines
25 KiB
Python

"""
AI Email Analysis Service
KI-powered email analysis with:
- Sender classification (authority recognition)
- Deadline extraction
- Category classification
- Response suggestions
"""
import os
import re
import logging
from typing import Optional, List, Dict, Any, Tuple
from datetime import datetime, timedelta
import httpx
from .models import (
EmailCategory,
SenderType,
TaskPriority,
SenderClassification,
DeadlineExtraction,
EmailAnalysisResult,
ResponseSuggestion,
KNOWN_AUTHORITIES_NI,
classify_sender_by_domain,
get_priority_from_sender_type,
)
from .mail_db import update_email_ai_analysis
logger = logging.getLogger(__name__)
# LLM Gateway configuration
LLM_GATEWAY_URL = os.getenv("LLM_GATEWAY_URL", "http://localhost:8090")
class AIEmailService:
"""
AI-powered email analysis service.
Features:
- Domain-based sender classification (fast, no LLM)
- LLM-based sender classification (fallback)
- Deadline extraction using regex + LLM
- Category classification
- Response suggestions
"""
def __init__(self):
self._http_client = None
async def get_http_client(self) -> httpx.AsyncClient:
"""Get or create HTTP client for LLM gateway."""
if self._http_client is None:
self._http_client = httpx.AsyncClient(timeout=30.0)
return self._http_client
# =========================================================================
# Sender Classification
# =========================================================================
async def classify_sender(
self,
sender_email: str,
sender_name: Optional[str] = None,
subject: Optional[str] = None,
body_preview: Optional[str] = None,
) -> SenderClassification:
"""
Classify the sender of an email.
First tries domain matching, then falls back to LLM.
Args:
sender_email: Sender's email address
sender_name: Sender's display name
subject: Email subject
body_preview: First 200 chars of body
Returns:
SenderClassification with type and confidence
"""
# Try domain-based classification first (fast, high confidence)
domain_result = classify_sender_by_domain(sender_email)
if domain_result:
return domain_result
# Fall back to LLM classification
return await self._classify_sender_llm(
sender_email, sender_name, subject, body_preview
)
async def _classify_sender_llm(
self,
sender_email: str,
sender_name: Optional[str],
subject: Optional[str],
body_preview: Optional[str],
) -> SenderClassification:
"""Classify sender using LLM."""
try:
client = await self.get_http_client()
prompt = f"""Analysiere den Absender dieser E-Mail und klassifiziere ihn:
Absender E-Mail: {sender_email}
Absender Name: {sender_name or "Nicht angegeben"}
Betreff: {subject or "Nicht angegeben"}
Vorschau: {body_preview[:200] if body_preview else "Nicht verfügbar"}
Klassifiziere den Absender in EINE der folgenden Kategorien:
- kultusministerium: Kultusministerium/Bildungsministerium
- landesschulbehoerde: Landesschulbehörde
- rlsb: Regionales Landesamt für Schule und Bildung
- schulamt: Schulamt
- nibis: Niedersächsischer Bildungsserver
- schultraeger: Schulträger/Kommune
- elternvertreter: Elternvertreter/Elternrat
- gewerkschaft: Gewerkschaft (GEW, VBE, etc.)
- fortbildungsinstitut: Fortbildungsinstitut (NLQ, etc.)
- privatperson: Privatperson
- unternehmen: Unternehmen/Firma
- unbekannt: Nicht einzuordnen
Antworte NUR mit dem Kategorienamen (z.B. "kultusministerium") und einer Konfidenz von 0.0 bis 1.0.
Format: kategorie|konfidenz|kurze_begründung
"""
response = await client.post(
f"{LLM_GATEWAY_URL}/api/v1/inference",
json={
"prompt": prompt,
"playbook": "mail_analysis",
"max_tokens": 100,
},
)
if response.status_code == 200:
data = response.json()
result_text = data.get("response", "unbekannt|0.5|")
# Parse response
parts = result_text.strip().split("|")
if len(parts) >= 2:
sender_type_str = parts[0].strip().lower()
confidence = float(parts[1].strip())
# Map to enum
type_mapping = {
"kultusministerium": SenderType.KULTUSMINISTERIUM,
"landesschulbehoerde": SenderType.LANDESSCHULBEHOERDE,
"rlsb": SenderType.RLSB,
"schulamt": SenderType.SCHULAMT,
"nibis": SenderType.NIBIS,
"schultraeger": SenderType.SCHULTRAEGER,
"elternvertreter": SenderType.ELTERNVERTRETER,
"gewerkschaft": SenderType.GEWERKSCHAFT,
"fortbildungsinstitut": SenderType.FORTBILDUNGSINSTITUT,
"privatperson": SenderType.PRIVATPERSON,
"unternehmen": SenderType.UNTERNEHMEN,
}
sender_type = type_mapping.get(sender_type_str, SenderType.UNBEKANNT)
return SenderClassification(
sender_type=sender_type,
confidence=min(max(confidence, 0.0), 1.0),
domain_matched=False,
ai_classified=True,
)
except Exception as e:
logger.warning(f"LLM sender classification failed: {e}")
# Default fallback
return SenderClassification(
sender_type=SenderType.UNBEKANNT,
confidence=0.3,
domain_matched=False,
ai_classified=False,
)
# =========================================================================
# Deadline Extraction
# =========================================================================
async def extract_deadlines(
self,
subject: str,
body_text: str,
) -> List[DeadlineExtraction]:
"""
Extract deadlines from email content.
Uses regex patterns first, then LLM for complex cases.
Args:
subject: Email subject
body_text: Email body text
Returns:
List of extracted deadlines
"""
deadlines = []
# Combine subject and body
full_text = f"{subject}\n{body_text}" if body_text else subject
# Try regex extraction first
regex_deadlines = self._extract_deadlines_regex(full_text)
deadlines.extend(regex_deadlines)
# If no regex matches, try LLM
if not deadlines and body_text:
llm_deadlines = await self._extract_deadlines_llm(subject, body_text[:1000])
deadlines.extend(llm_deadlines)
return deadlines
def _extract_deadlines_regex(self, text: str) -> List[DeadlineExtraction]:
"""Extract deadlines using regex patterns."""
deadlines = []
now = datetime.now()
# German date patterns
patterns = [
# "bis zum 15.01.2025"
(r"bis\s+(?:zum\s+)?(\d{1,2})\.(\d{1,2})\.(\d{2,4})", True),
# "spätestens am 15.01.2025"
(r"spätestens\s+(?:am\s+)?(\d{1,2})\.(\d{1,2})\.(\d{2,4})", True),
# "Abgabetermin: 15.01.2025"
(r"(?:Abgabe|Termin|Frist)[:\s]+(\d{1,2})\.(\d{1,2})\.(\d{2,4})", True),
# "innerhalb von 14 Tagen"
(r"innerhalb\s+von\s+(\d+)\s+(?:Tagen|Wochen)", False),
# "bis Ende Januar"
(r"bis\s+(?:Ende\s+)?(Januar|Februar|März|April|Mai|Juni|Juli|August|September|Oktober|November|Dezember)", False),
]
for pattern, is_specific_date in patterns:
matches = re.finditer(pattern, text, re.IGNORECASE)
for match in matches:
try:
if is_specific_date:
day = int(match.group(1))
month = int(match.group(2))
year = int(match.group(3))
# Handle 2-digit years
if year < 100:
year += 2000
deadline_date = datetime(year, month, day)
# Skip past dates
if deadline_date < now:
continue
# Get surrounding context
start = max(0, match.start() - 50)
end = min(len(text), match.end() + 50)
context = text[start:end].strip()
deadlines.append(DeadlineExtraction(
deadline_date=deadline_date,
description=f"Frist: {match.group(0)}",
confidence=0.85,
source_text=context,
is_firm=True,
))
else:
# Relative dates (innerhalb von X Tagen)
if "Tagen" in pattern or "Wochen" in pattern:
days = int(match.group(1))
if "Wochen" in match.group(0).lower():
days *= 7
deadline_date = now + timedelta(days=days)
deadlines.append(DeadlineExtraction(
deadline_date=deadline_date,
description=f"Relative Frist: {match.group(0)}",
confidence=0.7,
source_text=match.group(0),
is_firm=False,
))
except (ValueError, IndexError) as e:
logger.debug(f"Failed to parse date: {e}")
continue
return deadlines
async def _extract_deadlines_llm(
self,
subject: str,
body_preview: str,
) -> List[DeadlineExtraction]:
"""Extract deadlines using LLM."""
try:
client = await self.get_http_client()
prompt = f"""Analysiere diese E-Mail und extrahiere alle genannten Fristen und Termine:
Betreff: {subject}
Inhalt: {body_preview}
Liste alle Fristen im folgenden Format auf (eine pro Zeile):
DATUM|BESCHREIBUNG|VERBINDLICH
Beispiel: 2025-01-15|Abgabe der Berichte|ja
Wenn keine Fristen gefunden werden, antworte mit: KEINE_FRISTEN
Antworte NUR im angegebenen Format.
"""
response = await client.post(
f"{LLM_GATEWAY_URL}/api/v1/inference",
json={
"prompt": prompt,
"playbook": "mail_analysis",
"max_tokens": 200,
},
)
if response.status_code == 200:
data = response.json()
result_text = data.get("response", "")
if "KEINE_FRISTEN" in result_text:
return []
deadlines = []
for line in result_text.strip().split("\n"):
parts = line.split("|")
if len(parts) >= 2:
try:
date_str = parts[0].strip()
deadline_date = datetime.fromisoformat(date_str)
description = parts[1].strip()
is_firm = parts[2].strip().lower() == "ja" if len(parts) > 2 else True
deadlines.append(DeadlineExtraction(
deadline_date=deadline_date,
description=description,
confidence=0.7,
source_text=line,
is_firm=is_firm,
))
except (ValueError, IndexError):
continue
return deadlines
except Exception as e:
logger.warning(f"LLM deadline extraction failed: {e}")
return []
# =========================================================================
# Email Category Classification
# =========================================================================
async def classify_category(
self,
subject: str,
body_preview: str,
sender_type: SenderType,
) -> Tuple[EmailCategory, float]:
"""
Classify email into a category.
Args:
subject: Email subject
body_preview: First 200 chars of body
sender_type: Already classified sender type
Returns:
Tuple of (category, confidence)
"""
# Rule-based classification first
category, confidence = self._classify_category_rules(subject, body_preview, sender_type)
if confidence > 0.7:
return category, confidence
# Fall back to LLM
return await self._classify_category_llm(subject, body_preview)
def _classify_category_rules(
self,
subject: str,
body_preview: str,
sender_type: SenderType,
) -> Tuple[EmailCategory, float]:
"""Rule-based category classification."""
text = f"{subject} {body_preview}".lower()
# Keywords for each category
category_keywords = {
EmailCategory.DIENSTLICH: [
"dienstlich", "dienstanweisung", "erlass", "verordnung",
"bescheid", "verfügung", "ministerium", "behörde"
],
EmailCategory.PERSONAL: [
"personalrat", "stellenausschreibung", "versetzung",
"beurteilung", "dienstzeugnis", "krankmeldung", "elternzeit"
],
EmailCategory.FINANZEN: [
"budget", "haushalt", "etat", "abrechnung", "rechnung",
"erstattung", "zuschuss", "fördermittel"
],
EmailCategory.ELTERN: [
"elternbrief", "elternabend", "schulkonferenz",
"elternvertreter", "elternbeirat"
],
EmailCategory.SCHUELER: [
"schüler", "schülerin", "zeugnis", "klasse", "unterricht",
"prüfung", "klassenfahrt", "schulpflicht"
],
EmailCategory.FORTBILDUNG: [
"fortbildung", "seminar", "workshop", "schulung",
"weiterbildung", "nlq", "didaktik"
],
EmailCategory.VERANSTALTUNG: [
"einladung", "veranstaltung", "termin", "konferenz",
"sitzung", "tagung", "feier"
],
EmailCategory.SICHERHEIT: [
"sicherheit", "notfall", "brandschutz", "evakuierung",
"hygiene", "corona", "infektionsschutz"
],
EmailCategory.TECHNIK: [
"it", "software", "computer", "netzwerk", "login",
"passwort", "digitalisierung", "iserv"
],
EmailCategory.NEWSLETTER: [
"newsletter", "rundschreiben", "info-mail", "mitteilung"
],
EmailCategory.WERBUNG: [
"angebot", "rabatt", "aktion", "werbung", "abonnement"
],
}
best_category = EmailCategory.SONSTIGES
best_score = 0.0
for category, keywords in category_keywords.items():
score = sum(1 for kw in keywords if kw in text)
if score > best_score:
best_score = score
best_category = category
# Adjust based on sender type
if sender_type in [SenderType.KULTUSMINISTERIUM, SenderType.LANDESSCHULBEHOERDE, SenderType.RLSB]:
if best_category == EmailCategory.SONSTIGES:
best_category = EmailCategory.DIENSTLICH
best_score = 2
# Convert score to confidence
confidence = min(0.9, 0.4 + (best_score * 0.15))
return best_category, confidence
async def _classify_category_llm(
self,
subject: str,
body_preview: str,
) -> Tuple[EmailCategory, float]:
"""LLM-based category classification."""
try:
client = await self.get_http_client()
categories = ", ".join([c.value for c in EmailCategory])
prompt = f"""Klassifiziere diese E-Mail in EINE Kategorie:
Betreff: {subject}
Inhalt: {body_preview[:500]}
Kategorien: {categories}
Antworte NUR mit dem Kategorienamen und einer Konfidenz (0.0-1.0):
Format: kategorie|konfidenz
"""
response = await client.post(
f"{LLM_GATEWAY_URL}/api/v1/inference",
json={
"prompt": prompt,
"playbook": "mail_analysis",
"max_tokens": 50,
},
)
if response.status_code == 200:
data = response.json()
result = data.get("response", "sonstiges|0.5")
parts = result.strip().split("|")
if len(parts) >= 2:
category_str = parts[0].strip().lower()
confidence = float(parts[1].strip())
try:
category = EmailCategory(category_str)
return category, min(max(confidence, 0.0), 1.0)
except ValueError:
pass
except Exception as e:
logger.warning(f"LLM category classification failed: {e}")
return EmailCategory.SONSTIGES, 0.5
# =========================================================================
# Full Analysis Pipeline
# =========================================================================
async def analyze_email(
self,
email_id: str,
sender_email: str,
sender_name: Optional[str],
subject: str,
body_text: Optional[str],
body_preview: Optional[str],
) -> EmailAnalysisResult:
"""
Run full analysis pipeline on an email.
Args:
email_id: Database ID of the email
sender_email: Sender's email address
sender_name: Sender's display name
subject: Email subject
body_text: Full body text
body_preview: Preview text
Returns:
Complete analysis result
"""
# 1. Classify sender
sender_classification = await self.classify_sender(
sender_email, sender_name, subject, body_preview
)
# 2. Extract deadlines
deadlines = await self.extract_deadlines(subject, body_text or "")
# 3. Classify category
category, category_confidence = await self.classify_category(
subject, body_preview or "", sender_classification.sender_type
)
# 4. Determine priority
suggested_priority = get_priority_from_sender_type(sender_classification.sender_type)
# Upgrade priority if deadlines are found
if deadlines:
nearest_deadline = min(d.deadline_date for d in deadlines)
days_until = (nearest_deadline - datetime.now()).days
if days_until <= 1:
suggested_priority = TaskPriority.URGENT
elif days_until <= 3:
suggested_priority = TaskPriority.HIGH
elif days_until <= 7:
suggested_priority = max(suggested_priority, TaskPriority.MEDIUM)
# 5. Generate summary (optional, can be expensive)
summary = None # Could add LLM summary generation here
# 6. Determine if task should be auto-created
auto_create_task = (
len(deadlines) > 0 or
sender_classification.sender_type in [
SenderType.KULTUSMINISTERIUM,
SenderType.LANDESSCHULBEHOERDE,
SenderType.RLSB,
]
)
# 7. Store analysis in database
await update_email_ai_analysis(
email_id=email_id,
category=category.value,
sender_type=sender_classification.sender_type.value,
sender_authority_name=sender_classification.authority_name,
detected_deadlines=[
{
"date": d.deadline_date.isoformat(),
"description": d.description,
"is_firm": d.is_firm,
}
for d in deadlines
],
suggested_priority=suggested_priority.value,
ai_summary=summary,
)
return EmailAnalysisResult(
email_id=email_id,
category=category,
category_confidence=category_confidence,
sender_classification=sender_classification,
deadlines=deadlines,
suggested_priority=suggested_priority,
summary=summary,
suggested_actions=[],
auto_create_task=auto_create_task,
)
# =========================================================================
# Response Suggestions
# =========================================================================
async def suggest_response(
self,
subject: str,
body_text: str,
sender_type: SenderType,
category: EmailCategory,
) -> List[ResponseSuggestion]:
"""
Generate response suggestions for an email.
Args:
subject: Original email subject
body_text: Original email body
sender_type: Classified sender type
category: Classified category
Returns:
List of response suggestions
"""
suggestions = []
# Add standard templates based on sender type and category
if sender_type in [SenderType.KULTUSMINISTERIUM, SenderType.LANDESSCHULBEHOERDE, SenderType.RLSB]:
suggestions.append(ResponseSuggestion(
template_type="acknowledgment",
subject=f"Re: {subject}",
body="""Sehr geehrte Damen und Herren,
vielen Dank für Ihre Nachricht.
Ich bestätige den Eingang und werde die Angelegenheit fristgerecht bearbeiten.
Mit freundlichen Grüßen""",
confidence=0.8,
))
if category == EmailCategory.ELTERN:
suggestions.append(ResponseSuggestion(
template_type="parent_response",
subject=f"Re: {subject}",
body="""Liebe Eltern,
vielen Dank für Ihre Nachricht.
[Ihre Antwort hier]
Mit freundlichen Grüßen""",
confidence=0.7,
))
# Add LLM-generated suggestion
try:
llm_suggestion = await self._generate_response_llm(subject, body_text[:500], sender_type)
if llm_suggestion:
suggestions.append(llm_suggestion)
except Exception as e:
logger.warning(f"LLM response generation failed: {e}")
return suggestions
async def _generate_response_llm(
self,
subject: str,
body_preview: str,
sender_type: SenderType,
) -> Optional[ResponseSuggestion]:
"""Generate a response suggestion using LLM."""
try:
client = await self.get_http_client()
sender_desc = {
SenderType.KULTUSMINISTERIUM: "dem Kultusministerium",
SenderType.LANDESSCHULBEHOERDE: "der Landesschulbehörde",
SenderType.RLSB: "dem RLSB",
SenderType.ELTERNVERTRETER: "einem Elternvertreter",
}.get(sender_type, "einem Absender")
prompt = f"""Du bist eine Schulleiterin in Niedersachsen. Formuliere eine professionelle, kurze Antwort auf diese E-Mail von {sender_desc}:
Betreff: {subject}
Inhalt: {body_preview}
Die Antwort sollte:
- Höflich und formell sein
- Den Eingang bestätigen
- Eine konkrete nächste Aktion nennen oder um Klärung bitten
Antworte NUR mit dem Antworttext (ohne Betreffzeile, ohne "Betreff:").
"""
response = await client.post(
f"{LLM_GATEWAY_URL}/api/v1/inference",
json={
"prompt": prompt,
"playbook": "mail_analysis",
"max_tokens": 300,
},
)
if response.status_code == 200:
data = response.json()
body = data.get("response", "").strip()
if body:
return ResponseSuggestion(
template_type="ai_generated",
subject=f"Re: {subject}",
body=body,
confidence=0.6,
)
except Exception as e:
logger.warning(f"LLM response generation failed: {e}")
return None
# Global instance
_ai_service: Optional[AIEmailService] = None
def get_ai_email_service() -> AIEmailService:
"""Get or create the global AIEmailService instance."""
global _ai_service
if _ai_service is None:
_ai_service = AIEmailService()
return _ai_service