""" AI Email Analysis Service KI-powered email analysis with: - Sender classification (authority recognition) - Deadline extraction - Category classification - Response suggestions """ import os import re import logging from typing import Optional, List, Dict, Any, Tuple from datetime import datetime, timedelta import httpx from .models import ( EmailCategory, SenderType, TaskPriority, SenderClassification, DeadlineExtraction, EmailAnalysisResult, ResponseSuggestion, KNOWN_AUTHORITIES_NI, classify_sender_by_domain, get_priority_from_sender_type, ) from .mail_db import update_email_ai_analysis logger = logging.getLogger(__name__) # LLM Gateway configuration LLM_GATEWAY_URL = os.getenv("LLM_GATEWAY_URL", "http://localhost:8090") class AIEmailService: """ AI-powered email analysis service. Features: - Domain-based sender classification (fast, no LLM) - LLM-based sender classification (fallback) - Deadline extraction using regex + LLM - Category classification - Response suggestions """ def __init__(self): self._http_client = None async def get_http_client(self) -> httpx.AsyncClient: """Get or create HTTP client for LLM gateway.""" if self._http_client is None: self._http_client = httpx.AsyncClient(timeout=30.0) return self._http_client # ========================================================================= # Sender Classification # ========================================================================= async def classify_sender( self, sender_email: str, sender_name: Optional[str] = None, subject: Optional[str] = None, body_preview: Optional[str] = None, ) -> SenderClassification: """ Classify the sender of an email. First tries domain matching, then falls back to LLM. Args: sender_email: Sender's email address sender_name: Sender's display name subject: Email subject body_preview: First 200 chars of body Returns: SenderClassification with type and confidence """ # Try domain-based classification first (fast, high confidence) domain_result = classify_sender_by_domain(sender_email) if domain_result: return domain_result # Fall back to LLM classification return await self._classify_sender_llm( sender_email, sender_name, subject, body_preview ) async def _classify_sender_llm( self, sender_email: str, sender_name: Optional[str], subject: Optional[str], body_preview: Optional[str], ) -> SenderClassification: """Classify sender using LLM.""" try: client = await self.get_http_client() prompt = f"""Analysiere den Absender dieser E-Mail und klassifiziere ihn: Absender E-Mail: {sender_email} Absender Name: {sender_name or "Nicht angegeben"} Betreff: {subject or "Nicht angegeben"} Vorschau: {body_preview[:200] if body_preview else "Nicht verfügbar"} Klassifiziere den Absender in EINE der folgenden Kategorien: - kultusministerium: Kultusministerium/Bildungsministerium - landesschulbehoerde: Landesschulbehörde - rlsb: Regionales Landesamt für Schule und Bildung - schulamt: Schulamt - nibis: Niedersächsischer Bildungsserver - schultraeger: Schulträger/Kommune - elternvertreter: Elternvertreter/Elternrat - gewerkschaft: Gewerkschaft (GEW, VBE, etc.) - fortbildungsinstitut: Fortbildungsinstitut (NLQ, etc.) - privatperson: Privatperson - unternehmen: Unternehmen/Firma - unbekannt: Nicht einzuordnen Antworte NUR mit dem Kategorienamen (z.B. "kultusministerium") und einer Konfidenz von 0.0 bis 1.0. Format: kategorie|konfidenz|kurze_begründung """ response = await client.post( f"{LLM_GATEWAY_URL}/api/v1/inference", json={ "prompt": prompt, "playbook": "mail_analysis", "max_tokens": 100, }, ) if response.status_code == 200: data = response.json() result_text = data.get("response", "unbekannt|0.5|") # Parse response parts = result_text.strip().split("|") if len(parts) >= 2: sender_type_str = parts[0].strip().lower() confidence = float(parts[1].strip()) # Map to enum type_mapping = { "kultusministerium": SenderType.KULTUSMINISTERIUM, "landesschulbehoerde": SenderType.LANDESSCHULBEHOERDE, "rlsb": SenderType.RLSB, "schulamt": SenderType.SCHULAMT, "nibis": SenderType.NIBIS, "schultraeger": SenderType.SCHULTRAEGER, "elternvertreter": SenderType.ELTERNVERTRETER, "gewerkschaft": SenderType.GEWERKSCHAFT, "fortbildungsinstitut": SenderType.FORTBILDUNGSINSTITUT, "privatperson": SenderType.PRIVATPERSON, "unternehmen": SenderType.UNTERNEHMEN, } sender_type = type_mapping.get(sender_type_str, SenderType.UNBEKANNT) return SenderClassification( sender_type=sender_type, confidence=min(max(confidence, 0.0), 1.0), domain_matched=False, ai_classified=True, ) except Exception as e: logger.warning(f"LLM sender classification failed: {e}") # Default fallback return SenderClassification( sender_type=SenderType.UNBEKANNT, confidence=0.3, domain_matched=False, ai_classified=False, ) # ========================================================================= # Deadline Extraction # ========================================================================= async def extract_deadlines( self, subject: str, body_text: str, ) -> List[DeadlineExtraction]: """ Extract deadlines from email content. Uses regex patterns first, then LLM for complex cases. Args: subject: Email subject body_text: Email body text Returns: List of extracted deadlines """ deadlines = [] # Combine subject and body full_text = f"{subject}\n{body_text}" if body_text else subject # Try regex extraction first regex_deadlines = self._extract_deadlines_regex(full_text) deadlines.extend(regex_deadlines) # If no regex matches, try LLM if not deadlines and body_text: llm_deadlines = await self._extract_deadlines_llm(subject, body_text[:1000]) deadlines.extend(llm_deadlines) return deadlines def _extract_deadlines_regex(self, text: str) -> List[DeadlineExtraction]: """Extract deadlines using regex patterns.""" deadlines = [] now = datetime.now() # German date patterns patterns = [ # "bis zum 15.01.2025" (r"bis\s+(?:zum\s+)?(\d{1,2})\.(\d{1,2})\.(\d{2,4})", True), # "spätestens am 15.01.2025" (r"spätestens\s+(?:am\s+)?(\d{1,2})\.(\d{1,2})\.(\d{2,4})", True), # "Abgabetermin: 15.01.2025" (r"(?:Abgabe|Termin|Frist)[:\s]+(\d{1,2})\.(\d{1,2})\.(\d{2,4})", True), # "innerhalb von 14 Tagen" (r"innerhalb\s+von\s+(\d+)\s+(?:Tagen|Wochen)", False), # "bis Ende Januar" (r"bis\s+(?:Ende\s+)?(Januar|Februar|März|April|Mai|Juni|Juli|August|September|Oktober|November|Dezember)", False), ] for pattern, is_specific_date in patterns: matches = re.finditer(pattern, text, re.IGNORECASE) for match in matches: try: if is_specific_date: day = int(match.group(1)) month = int(match.group(2)) year = int(match.group(3)) # Handle 2-digit years if year < 100: year += 2000 deadline_date = datetime(year, month, day) # Skip past dates if deadline_date < now: continue # Get surrounding context start = max(0, match.start() - 50) end = min(len(text), match.end() + 50) context = text[start:end].strip() deadlines.append(DeadlineExtraction( deadline_date=deadline_date, description=f"Frist: {match.group(0)}", confidence=0.85, source_text=context, is_firm=True, )) else: # Relative dates (innerhalb von X Tagen) if "Tagen" in pattern or "Wochen" in pattern: days = int(match.group(1)) if "Wochen" in match.group(0).lower(): days *= 7 deadline_date = now + timedelta(days=days) deadlines.append(DeadlineExtraction( deadline_date=deadline_date, description=f"Relative Frist: {match.group(0)}", confidence=0.7, source_text=match.group(0), is_firm=False, )) except (ValueError, IndexError) as e: logger.debug(f"Failed to parse date: {e}") continue return deadlines async def _extract_deadlines_llm( self, subject: str, body_preview: str, ) -> List[DeadlineExtraction]: """Extract deadlines using LLM.""" try: client = await self.get_http_client() prompt = f"""Analysiere diese E-Mail und extrahiere alle genannten Fristen und Termine: Betreff: {subject} Inhalt: {body_preview} Liste alle Fristen im folgenden Format auf (eine pro Zeile): DATUM|BESCHREIBUNG|VERBINDLICH Beispiel: 2025-01-15|Abgabe der Berichte|ja Wenn keine Fristen gefunden werden, antworte mit: KEINE_FRISTEN Antworte NUR im angegebenen Format. """ response = await client.post( f"{LLM_GATEWAY_URL}/api/v1/inference", json={ "prompt": prompt, "playbook": "mail_analysis", "max_tokens": 200, }, ) if response.status_code == 200: data = response.json() result_text = data.get("response", "") if "KEINE_FRISTEN" in result_text: return [] deadlines = [] for line in result_text.strip().split("\n"): parts = line.split("|") if len(parts) >= 2: try: date_str = parts[0].strip() deadline_date = datetime.fromisoformat(date_str) description = parts[1].strip() is_firm = parts[2].strip().lower() == "ja" if len(parts) > 2 else True deadlines.append(DeadlineExtraction( deadline_date=deadline_date, description=description, confidence=0.7, source_text=line, is_firm=is_firm, )) except (ValueError, IndexError): continue return deadlines except Exception as e: logger.warning(f"LLM deadline extraction failed: {e}") return [] # ========================================================================= # Email Category Classification # ========================================================================= async def classify_category( self, subject: str, body_preview: str, sender_type: SenderType, ) -> Tuple[EmailCategory, float]: """ Classify email into a category. Args: subject: Email subject body_preview: First 200 chars of body sender_type: Already classified sender type Returns: Tuple of (category, confidence) """ # Rule-based classification first category, confidence = self._classify_category_rules(subject, body_preview, sender_type) if confidence > 0.7: return category, confidence # Fall back to LLM return await self._classify_category_llm(subject, body_preview) def _classify_category_rules( self, subject: str, body_preview: str, sender_type: SenderType, ) -> Tuple[EmailCategory, float]: """Rule-based category classification.""" text = f"{subject} {body_preview}".lower() # Keywords for each category category_keywords = { EmailCategory.DIENSTLICH: [ "dienstlich", "dienstanweisung", "erlass", "verordnung", "bescheid", "verfügung", "ministerium", "behörde" ], EmailCategory.PERSONAL: [ "personalrat", "stellenausschreibung", "versetzung", "beurteilung", "dienstzeugnis", "krankmeldung", "elternzeit" ], EmailCategory.FINANZEN: [ "budget", "haushalt", "etat", "abrechnung", "rechnung", "erstattung", "zuschuss", "fördermittel" ], EmailCategory.ELTERN: [ "elternbrief", "elternabend", "schulkonferenz", "elternvertreter", "elternbeirat" ], EmailCategory.SCHUELER: [ "schüler", "schülerin", "zeugnis", "klasse", "unterricht", "prüfung", "klassenfahrt", "schulpflicht" ], EmailCategory.FORTBILDUNG: [ "fortbildung", "seminar", "workshop", "schulung", "weiterbildung", "nlq", "didaktik" ], EmailCategory.VERANSTALTUNG: [ "einladung", "veranstaltung", "termin", "konferenz", "sitzung", "tagung", "feier" ], EmailCategory.SICHERHEIT: [ "sicherheit", "notfall", "brandschutz", "evakuierung", "hygiene", "corona", "infektionsschutz" ], EmailCategory.TECHNIK: [ "it", "software", "computer", "netzwerk", "login", "passwort", "digitalisierung", "iserv" ], EmailCategory.NEWSLETTER: [ "newsletter", "rundschreiben", "info-mail", "mitteilung" ], EmailCategory.WERBUNG: [ "angebot", "rabatt", "aktion", "werbung", "abonnement" ], } best_category = EmailCategory.SONSTIGES best_score = 0.0 for category, keywords in category_keywords.items(): score = sum(1 for kw in keywords if kw in text) if score > best_score: best_score = score best_category = category # Adjust based on sender type if sender_type in [SenderType.KULTUSMINISTERIUM, SenderType.LANDESSCHULBEHOERDE, SenderType.RLSB]: if best_category == EmailCategory.SONSTIGES: best_category = EmailCategory.DIENSTLICH best_score = 2 # Convert score to confidence confidence = min(0.9, 0.4 + (best_score * 0.15)) return best_category, confidence async def _classify_category_llm( self, subject: str, body_preview: str, ) -> Tuple[EmailCategory, float]: """LLM-based category classification.""" try: client = await self.get_http_client() categories = ", ".join([c.value for c in EmailCategory]) prompt = f"""Klassifiziere diese E-Mail in EINE Kategorie: Betreff: {subject} Inhalt: {body_preview[:500]} Kategorien: {categories} Antworte NUR mit dem Kategorienamen und einer Konfidenz (0.0-1.0): Format: kategorie|konfidenz """ response = await client.post( f"{LLM_GATEWAY_URL}/api/v1/inference", json={ "prompt": prompt, "playbook": "mail_analysis", "max_tokens": 50, }, ) if response.status_code == 200: data = response.json() result = data.get("response", "sonstiges|0.5") parts = result.strip().split("|") if len(parts) >= 2: category_str = parts[0].strip().lower() confidence = float(parts[1].strip()) try: category = EmailCategory(category_str) return category, min(max(confidence, 0.0), 1.0) except ValueError: pass except Exception as e: logger.warning(f"LLM category classification failed: {e}") return EmailCategory.SONSTIGES, 0.5 # ========================================================================= # Full Analysis Pipeline # ========================================================================= async def analyze_email( self, email_id: str, sender_email: str, sender_name: Optional[str], subject: str, body_text: Optional[str], body_preview: Optional[str], ) -> EmailAnalysisResult: """ Run full analysis pipeline on an email. Args: email_id: Database ID of the email sender_email: Sender's email address sender_name: Sender's display name subject: Email subject body_text: Full body text body_preview: Preview text Returns: Complete analysis result """ # 1. Classify sender sender_classification = await self.classify_sender( sender_email, sender_name, subject, body_preview ) # 2. Extract deadlines deadlines = await self.extract_deadlines(subject, body_text or "") # 3. Classify category category, category_confidence = await self.classify_category( subject, body_preview or "", sender_classification.sender_type ) # 4. Determine priority suggested_priority = get_priority_from_sender_type(sender_classification.sender_type) # Upgrade priority if deadlines are found if deadlines: nearest_deadline = min(d.deadline_date for d in deadlines) days_until = (nearest_deadline - datetime.now()).days if days_until <= 1: suggested_priority = TaskPriority.URGENT elif days_until <= 3: suggested_priority = TaskPriority.HIGH elif days_until <= 7: suggested_priority = max(suggested_priority, TaskPriority.MEDIUM) # 5. Generate summary (optional, can be expensive) summary = None # Could add LLM summary generation here # 6. Determine if task should be auto-created auto_create_task = ( len(deadlines) > 0 or sender_classification.sender_type in [ SenderType.KULTUSMINISTERIUM, SenderType.LANDESSCHULBEHOERDE, SenderType.RLSB, ] ) # 7. Store analysis in database await update_email_ai_analysis( email_id=email_id, category=category.value, sender_type=sender_classification.sender_type.value, sender_authority_name=sender_classification.authority_name, detected_deadlines=[ { "date": d.deadline_date.isoformat(), "description": d.description, "is_firm": d.is_firm, } for d in deadlines ], suggested_priority=suggested_priority.value, ai_summary=summary, ) return EmailAnalysisResult( email_id=email_id, category=category, category_confidence=category_confidence, sender_classification=sender_classification, deadlines=deadlines, suggested_priority=suggested_priority, summary=summary, suggested_actions=[], auto_create_task=auto_create_task, ) # ========================================================================= # Response Suggestions # ========================================================================= async def suggest_response( self, subject: str, body_text: str, sender_type: SenderType, category: EmailCategory, ) -> List[ResponseSuggestion]: """ Generate response suggestions for an email. Args: subject: Original email subject body_text: Original email body sender_type: Classified sender type category: Classified category Returns: List of response suggestions """ suggestions = [] # Add standard templates based on sender type and category if sender_type in [SenderType.KULTUSMINISTERIUM, SenderType.LANDESSCHULBEHOERDE, SenderType.RLSB]: suggestions.append(ResponseSuggestion( template_type="acknowledgment", subject=f"Re: {subject}", body="""Sehr geehrte Damen und Herren, vielen Dank für Ihre Nachricht. Ich bestätige den Eingang und werde die Angelegenheit fristgerecht bearbeiten. Mit freundlichen Grüßen""", confidence=0.8, )) if category == EmailCategory.ELTERN: suggestions.append(ResponseSuggestion( template_type="parent_response", subject=f"Re: {subject}", body="""Liebe Eltern, vielen Dank für Ihre Nachricht. [Ihre Antwort hier] Mit freundlichen Grüßen""", confidence=0.7, )) # Add LLM-generated suggestion try: llm_suggestion = await self._generate_response_llm(subject, body_text[:500], sender_type) if llm_suggestion: suggestions.append(llm_suggestion) except Exception as e: logger.warning(f"LLM response generation failed: {e}") return suggestions async def _generate_response_llm( self, subject: str, body_preview: str, sender_type: SenderType, ) -> Optional[ResponseSuggestion]: """Generate a response suggestion using LLM.""" try: client = await self.get_http_client() sender_desc = { SenderType.KULTUSMINISTERIUM: "dem Kultusministerium", SenderType.LANDESSCHULBEHOERDE: "der Landesschulbehörde", SenderType.RLSB: "dem RLSB", SenderType.ELTERNVERTRETER: "einem Elternvertreter", }.get(sender_type, "einem Absender") prompt = f"""Du bist eine Schulleiterin in Niedersachsen. Formuliere eine professionelle, kurze Antwort auf diese E-Mail von {sender_desc}: Betreff: {subject} Inhalt: {body_preview} Die Antwort sollte: - Höflich und formell sein - Den Eingang bestätigen - Eine konkrete nächste Aktion nennen oder um Klärung bitten Antworte NUR mit dem Antworttext (ohne Betreffzeile, ohne "Betreff:"). """ response = await client.post( f"{LLM_GATEWAY_URL}/api/v1/inference", json={ "prompt": prompt, "playbook": "mail_analysis", "max_tokens": 300, }, ) if response.status_code == 200: data = response.json() body = data.get("response", "").strip() if body: return ResponseSuggestion( template_type="ai_generated", subject=f"Re: {subject}", body=body, confidence=0.6, ) except Exception as e: logger.warning(f"LLM response generation failed: {e}") return None # Global instance _ai_service: Optional[AIEmailService] = None def get_ai_email_service() -> AIEmailService: """Get or create the global AIEmailService instance.""" global _ai_service if _ai_service is None: _ai_service = AIEmailService() return _ai_service