[split-required] Split 700-870 LOC files across all services

backend-lehrer (11 files):
- llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6)
- messenger_api.py (840 → 5), print_generator.py (824 → 5)
- unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4)
- llm_gateway/routes/edu_search_seeds.py (710 → 4)

klausur-service (12 files):
- ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4)
- legal_corpus_api.py (790 → 4), page_crop.py (758 → 3)
- mail/ai_service.py (747 → 4), github_crawler.py (767 → 3)
- trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4)
- dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4)

website (6 pages):
- audit-checklist (867 → 8), content (806 → 6)
- screen-flow (790 → 4), scraper (789 → 5)
- zeugnisse (776 → 5), modules (745 → 4)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-25 08:01:18 +02:00
parent b6983ab1dc
commit 34da9f4cda
106 changed files with 16500 additions and 16947 deletions

View File

@@ -0,0 +1,269 @@
"""
AI Email - Category Classification and Response Suggestions
Rule-based and LLM-based email category classification,
plus response suggestion generation.
Extracted from ai_service.py to keep files under 500 LOC.
"""
import os
import logging
from typing import Optional, List, Tuple
import httpx
from .models import (
EmailCategory,
SenderType,
ResponseSuggestion,
)
logger = logging.getLogger(__name__)
# LLM Gateway configuration
LLM_GATEWAY_URL = os.getenv("LLM_GATEWAY_URL", "http://localhost:8090")
async def classify_category(
http_client: httpx.AsyncClient,
subject: str,
body_preview: str,
sender_type: SenderType,
) -> Tuple[EmailCategory, float]:
"""
Classify email into a category.
Rule-based classification first, falls back to LLM.
"""
category, confidence = _classify_category_rules(subject, body_preview, sender_type)
if confidence > 0.7:
return category, confidence
return await _classify_category_llm(http_client, subject, body_preview)
def _classify_category_rules(
subject: str,
body_preview: str,
sender_type: SenderType,
) -> Tuple[EmailCategory, float]:
"""Rule-based category classification."""
text = f"{subject} {body_preview}".lower()
category_keywords = {
EmailCategory.DIENSTLICH: [
"dienstlich", "dienstanweisung", "erlass", "verordnung",
"bescheid", "verfuegung", "ministerium", "behoerde"
],
EmailCategory.PERSONAL: [
"personalrat", "stellenausschreibung", "versetzung",
"beurteilung", "dienstzeugnis", "krankmeldung", "elternzeit"
],
EmailCategory.FINANZEN: [
"budget", "haushalt", "etat", "abrechnung", "rechnung",
"erstattung", "zuschuss", "foerdermittel"
],
EmailCategory.ELTERN: [
"elternbrief", "elternabend", "schulkonferenz",
"elternvertreter", "elternbeirat"
],
EmailCategory.SCHUELER: [
"schueler", "schuelerin", "zeugnis", "klasse", "unterricht",
"pruefung", "klassenfahrt", "schulpflicht"
],
EmailCategory.FORTBILDUNG: [
"fortbildung", "seminar", "workshop", "schulung",
"weiterbildung", "nlq", "didaktik"
],
EmailCategory.VERANSTALTUNG: [
"einladung", "veranstaltung", "termin", "konferenz",
"sitzung", "tagung", "feier"
],
EmailCategory.SICHERHEIT: [
"sicherheit", "notfall", "brandschutz", "evakuierung",
"hygiene", "corona", "infektionsschutz"
],
EmailCategory.TECHNIK: [
"it", "software", "computer", "netzwerk", "login",
"passwort", "digitalisierung", "iserv"
],
EmailCategory.NEWSLETTER: [
"newsletter", "rundschreiben", "info-mail", "mitteilung"
],
EmailCategory.WERBUNG: [
"angebot", "rabatt", "aktion", "werbung", "abonnement"
],
}
best_category = EmailCategory.SONSTIGES
best_score = 0.0
for category, keywords in category_keywords.items():
score = sum(1 for kw in keywords if kw in text)
if score > best_score:
best_score = score
best_category = category
if sender_type in [SenderType.KULTUSMINISTERIUM, SenderType.LANDESSCHULBEHOERDE, SenderType.RLSB]:
if best_category == EmailCategory.SONSTIGES:
best_category = EmailCategory.DIENSTLICH
best_score = 2
confidence = min(0.9, 0.4 + (best_score * 0.15))
return best_category, confidence
async def _classify_category_llm(
client: httpx.AsyncClient,
subject: str,
body_preview: str,
) -> Tuple[EmailCategory, float]:
"""LLM-based category classification."""
try:
categories = ", ".join([c.value for c in EmailCategory])
prompt = f"""Klassifiziere diese E-Mail in EINE Kategorie:
Betreff: {subject}
Inhalt: {body_preview[:500]}
Kategorien: {categories}
Antworte NUR mit dem Kategorienamen und einer Konfidenz (0.0-1.0):
Format: kategorie|konfidenz
"""
response = await client.post(
f"{LLM_GATEWAY_URL}/api/v1/inference",
json={
"prompt": prompt,
"playbook": "mail_analysis",
"max_tokens": 50,
},
)
if response.status_code == 200:
data = response.json()
result = data.get("response", "sonstiges|0.5")
parts = result.strip().split("|")
if len(parts) >= 2:
category_str = parts[0].strip().lower()
confidence = float(parts[1].strip())
try:
category = EmailCategory(category_str)
return category, min(max(confidence, 0.0), 1.0)
except ValueError:
pass
except Exception as e:
logger.warning(f"LLM category classification failed: {e}")
return EmailCategory.SONSTIGES, 0.5
async def suggest_response(
http_client: httpx.AsyncClient,
subject: str,
body_text: str,
sender_type: SenderType,
category: EmailCategory,
) -> List[ResponseSuggestion]:
"""Generate response suggestions for an email."""
suggestions = []
if sender_type in [SenderType.KULTUSMINISTERIUM, SenderType.LANDESSCHULBEHOERDE, SenderType.RLSB]:
suggestions.append(ResponseSuggestion(
template_type="acknowledgment",
subject=f"Re: {subject}",
body="""Sehr geehrte Damen und Herren,
vielen Dank fuer Ihre Nachricht.
Ich bestaetige den Eingang und werde die Angelegenheit fristgerecht bearbeiten.
Mit freundlichen Gruessen""",
confidence=0.8,
))
if category == EmailCategory.ELTERN:
suggestions.append(ResponseSuggestion(
template_type="parent_response",
subject=f"Re: {subject}",
body="""Liebe Eltern,
vielen Dank fuer Ihre Nachricht.
[Ihre Antwort hier]
Mit freundlichen Gruessen""",
confidence=0.7,
))
try:
llm_suggestion = await _generate_response_llm(http_client, subject, body_text[:500], sender_type)
if llm_suggestion:
suggestions.append(llm_suggestion)
except Exception as e:
logger.warning(f"LLM response generation failed: {e}")
return suggestions
async def _generate_response_llm(
client: httpx.AsyncClient,
subject: str,
body_preview: str,
sender_type: SenderType,
) -> Optional[ResponseSuggestion]:
"""Generate a response suggestion using LLM."""
try:
sender_desc = {
SenderType.KULTUSMINISTERIUM: "dem Kultusministerium",
SenderType.LANDESSCHULBEHOERDE: "der Landesschulbehoerde",
SenderType.RLSB: "dem RLSB",
SenderType.ELTERNVERTRETER: "einem Elternvertreter",
}.get(sender_type, "einem Absender")
prompt = f"""Du bist eine Schulleiterin in Niedersachsen. Formuliere eine professionelle, kurze Antwort auf diese E-Mail von {sender_desc}:
Betreff: {subject}
Inhalt: {body_preview}
Die Antwort sollte:
- Hoeflich und formell sein
- Den Eingang bestaetigen
- Eine konkrete naechste Aktion nennen oder um Klaerung bitten
Antworte NUR mit dem Antworttext (ohne Betreffzeile, ohne "Betreff:").
"""
response = await client.post(
f"{LLM_GATEWAY_URL}/api/v1/inference",
json={
"prompt": prompt,
"playbook": "mail_analysis",
"max_tokens": 300,
},
)
if response.status_code == 200:
data = response.json()
body = data.get("response", "").strip()
if body:
return ResponseSuggestion(
template_type="ai_generated",
subject=f"Re: {subject}",
body=body,
confidence=0.6,
)
except Exception as e:
logger.warning(f"LLM response generation failed: {e}")
return None

View File

@@ -0,0 +1,184 @@
"""
AI Email - Deadline Extraction
Regex-based and LLM-based deadline extraction from email content.
Extracted from ai_service.py to keep files under 500 LOC.
"""
import os
import re
import logging
from typing import List
from datetime import datetime, timedelta
import httpx
from .models import DeadlineExtraction
logger = logging.getLogger(__name__)
# LLM Gateway configuration
LLM_GATEWAY_URL = os.getenv("LLM_GATEWAY_URL", "http://localhost:8090")
async def extract_deadlines(
http_client: httpx.AsyncClient,
subject: str,
body_text: str,
) -> List[DeadlineExtraction]:
"""
Extract deadlines from email content.
Uses regex patterns first, then LLM for complex cases.
"""
deadlines = []
full_text = f"{subject}\n{body_text}" if body_text else subject
# Try regex extraction first
regex_deadlines = _extract_deadlines_regex(full_text)
deadlines.extend(regex_deadlines)
# If no regex matches, try LLM
if not deadlines and body_text:
llm_deadlines = await _extract_deadlines_llm(http_client, subject, body_text[:1000])
deadlines.extend(llm_deadlines)
return deadlines
def _extract_deadlines_regex(text: str) -> List[DeadlineExtraction]:
"""Extract deadlines using regex patterns."""
deadlines = []
now = datetime.now()
# German date patterns
patterns = [
# "bis zum 15.01.2025"
(r"bis\s+(?:zum\s+)?(\d{1,2})\.(\d{1,2})\.(\d{2,4})", True),
# "spaetestens am 15.01.2025"
(r"sp\u00e4testens\s+(?:am\s+)?(\d{1,2})\.(\d{1,2})\.(\d{2,4})", True),
# "Abgabetermin: 15.01.2025"
(r"(?:Abgabe|Termin|Frist)[:\s]+(\d{1,2})\.(\d{1,2})\.(\d{2,4})", True),
# "innerhalb von 14 Tagen"
(r"innerhalb\s+von\s+(\d+)\s+(?:Tagen|Wochen)", False),
# "bis Ende Januar"
(r"bis\s+(?:Ende\s+)?(Januar|Februar|M\u00e4rz|April|Mai|Juni|Juli|August|September|Oktober|November|Dezember)", False),
]
for pattern, is_specific_date in patterns:
matches = re.finditer(pattern, text, re.IGNORECASE)
for match in matches:
try:
if is_specific_date:
day = int(match.group(1))
month = int(match.group(2))
year = int(match.group(3))
if year < 100:
year += 2000
deadline_date = datetime(year, month, day)
if deadline_date < now:
continue
start = max(0, match.start() - 50)
end = min(len(text), match.end() + 50)
context = text[start:end].strip()
deadlines.append(DeadlineExtraction(
deadline_date=deadline_date,
description=f"Frist: {match.group(0)}",
confidence=0.85,
source_text=context,
is_firm=True,
))
else:
if "Tagen" in pattern or "Wochen" in pattern:
days = int(match.group(1))
if "Wochen" in match.group(0).lower():
days *= 7
deadline_date = now + timedelta(days=days)
deadlines.append(DeadlineExtraction(
deadline_date=deadline_date,
description=f"Relative Frist: {match.group(0)}",
confidence=0.7,
source_text=match.group(0),
is_firm=False,
))
except (ValueError, IndexError) as e:
logger.debug(f"Failed to parse date: {e}")
continue
return deadlines
async def _extract_deadlines_llm(
client: httpx.AsyncClient,
subject: str,
body_preview: str,
) -> List[DeadlineExtraction]:
"""Extract deadlines using LLM."""
try:
prompt = f"""Analysiere diese E-Mail und extrahiere alle genannten Fristen und Termine:
Betreff: {subject}
Inhalt: {body_preview}
Liste alle Fristen im folgenden Format auf (eine pro Zeile):
DATUM|BESCHREIBUNG|VERBINDLICH
Beispiel: 2025-01-15|Abgabe der Berichte|ja
Wenn keine Fristen gefunden werden, antworte mit: KEINE_FRISTEN
Antworte NUR im angegebenen Format.
"""
response = await client.post(
f"{LLM_GATEWAY_URL}/api/v1/inference",
json={
"prompt": prompt,
"playbook": "mail_analysis",
"max_tokens": 200,
},
)
if response.status_code == 200:
data = response.json()
result_text = data.get("response", "")
if "KEINE_FRISTEN" in result_text:
return []
deadlines = []
for line in result_text.strip().split("\n"):
parts = line.split("|")
if len(parts) >= 2:
try:
date_str = parts[0].strip()
deadline_date = datetime.fromisoformat(date_str)
description = parts[1].strip()
is_firm = parts[2].strip().lower() == "ja" if len(parts) > 2 else True
deadlines.append(DeadlineExtraction(
deadline_date=deadline_date,
description=description,
confidence=0.7,
source_text=line,
is_firm=is_firm,
))
except (ValueError, IndexError):
continue
return deadlines
except Exception as e:
logger.warning(f"LLM deadline extraction failed: {e}")
return []

View File

@@ -0,0 +1,134 @@
"""
AI Email - Sender Classification
Domain-based and LLM-based sender classification for emails.
Extracted from ai_service.py to keep files under 500 LOC.
"""
import os
import logging
from typing import Optional
import httpx
from .models import (
SenderType,
SenderClassification,
classify_sender_by_domain,
)
logger = logging.getLogger(__name__)
# LLM Gateway configuration
LLM_GATEWAY_URL = os.getenv("LLM_GATEWAY_URL", "http://localhost:8090")
async def classify_sender(
http_client: httpx.AsyncClient,
sender_email: str,
sender_name: Optional[str] = None,
subject: Optional[str] = None,
body_preview: Optional[str] = None,
) -> SenderClassification:
"""
Classify the sender of an email.
First tries domain matching, then falls back to LLM.
"""
# Try domain-based classification first (fast, high confidence)
domain_result = classify_sender_by_domain(sender_email)
if domain_result:
return domain_result
# Fall back to LLM classification
return await _classify_sender_llm(
http_client, sender_email, sender_name, subject, body_preview
)
async def _classify_sender_llm(
client: httpx.AsyncClient,
sender_email: str,
sender_name: Optional[str],
subject: Optional[str],
body_preview: Optional[str],
) -> SenderClassification:
"""Classify sender using LLM."""
try:
prompt = f"""Analysiere den Absender dieser E-Mail und klassifiziere ihn:
Absender E-Mail: {sender_email}
Absender Name: {sender_name or "Nicht angegeben"}
Betreff: {subject or "Nicht angegeben"}
Vorschau: {body_preview[:200] if body_preview else "Nicht verfuegbar"}
Klassifiziere den Absender in EINE der folgenden Kategorien:
- kultusministerium: Kultusministerium/Bildungsministerium
- landesschulbehoerde: Landesschulbehoerde
- rlsb: Regionales Landesamt fuer Schule und Bildung
- schulamt: Schulamt
- nibis: Niedersaechsischer Bildungsserver
- schultraeger: Schultraeger/Kommune
- elternvertreter: Elternvertreter/Elternrat
- gewerkschaft: Gewerkschaft (GEW, VBE, etc.)
- fortbildungsinstitut: Fortbildungsinstitut (NLQ, etc.)
- privatperson: Privatperson
- unternehmen: Unternehmen/Firma
- unbekannt: Nicht einzuordnen
Antworte NUR mit dem Kategorienamen (z.B. "kultusministerium") und einer Konfidenz von 0.0 bis 1.0.
Format: kategorie|konfidenz|kurze_begruendung
"""
response = await client.post(
f"{LLM_GATEWAY_URL}/api/v1/inference",
json={
"prompt": prompt,
"playbook": "mail_analysis",
"max_tokens": 100,
},
)
if response.status_code == 200:
data = response.json()
result_text = data.get("response", "unbekannt|0.5|")
parts = result_text.strip().split("|")
if len(parts) >= 2:
sender_type_str = parts[0].strip().lower()
confidence = float(parts[1].strip())
type_mapping = {
"kultusministerium": SenderType.KULTUSMINISTERIUM,
"landesschulbehoerde": SenderType.LANDESSCHULBEHOERDE,
"rlsb": SenderType.RLSB,
"schulamt": SenderType.SCHULAMT,
"nibis": SenderType.NIBIS,
"schultraeger": SenderType.SCHULTRAEGER,
"elternvertreter": SenderType.ELTERNVERTRETER,
"gewerkschaft": SenderType.GEWERKSCHAFT,
"fortbildungsinstitut": SenderType.FORTBILDUNGSINSTITUT,
"privatperson": SenderType.PRIVATPERSON,
"unternehmen": SenderType.UNTERNEHMEN,
}
sender_type = type_mapping.get(sender_type_str, SenderType.UNBEKANNT)
return SenderClassification(
sender_type=sender_type,
confidence=min(max(confidence, 0.0), 1.0),
domain_matched=False,
ai_classified=True,
)
except Exception as e:
logger.warning(f"LLM sender classification failed: {e}")
# Default fallback
return SenderClassification(
sender_type=SenderType.UNBEKANNT,
confidence=0.3,
domain_matched=False,
ai_classified=False,
)

View File

@@ -1,18 +1,19 @@
"""
AI Email Analysis Service
AI Email Analysis Service — Barrel Re-export
KI-powered email analysis with:
- Sender classification (authority recognition)
- Deadline extraction
- Category classification
- Response suggestions
Split into:
- mail/ai_sender.py — Sender classification (domain + LLM)
- mail/ai_deadline.py — Deadline extraction (regex + LLM)
- mail/ai_category.py — Category classification + response suggestions
The AIEmailService class and get_ai_email_service() are defined here
to maintain the original public API.
"""
import os
import re
import logging
from typing import Optional, List, Dict, Any, Tuple
from datetime import datetime, timedelta
from typing import Optional, List, Tuple
from datetime import datetime
import httpx
from .models import (
@@ -23,17 +24,15 @@ from .models import (
DeadlineExtraction,
EmailAnalysisResult,
ResponseSuggestion,
KNOWN_AUTHORITIES_NI,
classify_sender_by_domain,
get_priority_from_sender_type,
)
from .mail_db import update_email_ai_analysis
from .ai_sender import classify_sender, LLM_GATEWAY_URL
from .ai_deadline import extract_deadlines
from .ai_category import classify_category, suggest_response
logger = logging.getLogger(__name__)
# LLM Gateway configuration
LLM_GATEWAY_URL = os.getenv("LLM_GATEWAY_URL", "http://localhost:8090")
class AIEmailService:
"""
@@ -56,10 +55,6 @@ class AIEmailService:
self._http_client = httpx.AsyncClient(timeout=30.0)
return self._http_client
# =========================================================================
# Sender Classification
# =========================================================================
async def classify_sender(
self,
sender_email: str,
@@ -67,300 +62,20 @@ class AIEmailService:
subject: Optional[str] = None,
body_preview: Optional[str] = None,
) -> SenderClassification:
"""
Classify the sender of an email.
First tries domain matching, then falls back to LLM.
Args:
sender_email: Sender's email address
sender_name: Sender's display name
subject: Email subject
body_preview: First 200 chars of body
Returns:
SenderClassification with type and confidence
"""
# Try domain-based classification first (fast, high confidence)
domain_result = classify_sender_by_domain(sender_email)
if domain_result:
return domain_result
# Fall back to LLM classification
return await self._classify_sender_llm(
sender_email, sender_name, subject, body_preview
"""Classify the sender of an email."""
client = await self.get_http_client()
return await classify_sender(
client, sender_email, sender_name, subject, body_preview
)
async def _classify_sender_llm(
self,
sender_email: str,
sender_name: Optional[str],
subject: Optional[str],
body_preview: Optional[str],
) -> SenderClassification:
"""Classify sender using LLM."""
try:
client = await self.get_http_client()
prompt = f"""Analysiere den Absender dieser E-Mail und klassifiziere ihn:
Absender E-Mail: {sender_email}
Absender Name: {sender_name or "Nicht angegeben"}
Betreff: {subject or "Nicht angegeben"}
Vorschau: {body_preview[:200] if body_preview else "Nicht verfügbar"}
Klassifiziere den Absender in EINE der folgenden Kategorien:
- kultusministerium: Kultusministerium/Bildungsministerium
- landesschulbehoerde: Landesschulbehörde
- rlsb: Regionales Landesamt für Schule und Bildung
- schulamt: Schulamt
- nibis: Niedersächsischer Bildungsserver
- schultraeger: Schulträger/Kommune
- elternvertreter: Elternvertreter/Elternrat
- gewerkschaft: Gewerkschaft (GEW, VBE, etc.)
- fortbildungsinstitut: Fortbildungsinstitut (NLQ, etc.)
- privatperson: Privatperson
- unternehmen: Unternehmen/Firma
- unbekannt: Nicht einzuordnen
Antworte NUR mit dem Kategorienamen (z.B. "kultusministerium") und einer Konfidenz von 0.0 bis 1.0.
Format: kategorie|konfidenz|kurze_begründung
"""
response = await client.post(
f"{LLM_GATEWAY_URL}/api/v1/inference",
json={
"prompt": prompt,
"playbook": "mail_analysis",
"max_tokens": 100,
},
)
if response.status_code == 200:
data = response.json()
result_text = data.get("response", "unbekannt|0.5|")
# Parse response
parts = result_text.strip().split("|")
if len(parts) >= 2:
sender_type_str = parts[0].strip().lower()
confidence = float(parts[1].strip())
# Map to enum
type_mapping = {
"kultusministerium": SenderType.KULTUSMINISTERIUM,
"landesschulbehoerde": SenderType.LANDESSCHULBEHOERDE,
"rlsb": SenderType.RLSB,
"schulamt": SenderType.SCHULAMT,
"nibis": SenderType.NIBIS,
"schultraeger": SenderType.SCHULTRAEGER,
"elternvertreter": SenderType.ELTERNVERTRETER,
"gewerkschaft": SenderType.GEWERKSCHAFT,
"fortbildungsinstitut": SenderType.FORTBILDUNGSINSTITUT,
"privatperson": SenderType.PRIVATPERSON,
"unternehmen": SenderType.UNTERNEHMEN,
}
sender_type = type_mapping.get(sender_type_str, SenderType.UNBEKANNT)
return SenderClassification(
sender_type=sender_type,
confidence=min(max(confidence, 0.0), 1.0),
domain_matched=False,
ai_classified=True,
)
except Exception as e:
logger.warning(f"LLM sender classification failed: {e}")
# Default fallback
return SenderClassification(
sender_type=SenderType.UNBEKANNT,
confidence=0.3,
domain_matched=False,
ai_classified=False,
)
# =========================================================================
# Deadline Extraction
# =========================================================================
async def extract_deadlines(
self,
subject: str,
body_text: str,
) -> List[DeadlineExtraction]:
"""
Extract deadlines from email content.
Uses regex patterns first, then LLM for complex cases.
Args:
subject: Email subject
body_text: Email body text
Returns:
List of extracted deadlines
"""
deadlines = []
# Combine subject and body
full_text = f"{subject}\n{body_text}" if body_text else subject
# Try regex extraction first
regex_deadlines = self._extract_deadlines_regex(full_text)
deadlines.extend(regex_deadlines)
# If no regex matches, try LLM
if not deadlines and body_text:
llm_deadlines = await self._extract_deadlines_llm(subject, body_text[:1000])
deadlines.extend(llm_deadlines)
return deadlines
def _extract_deadlines_regex(self, text: str) -> List[DeadlineExtraction]:
"""Extract deadlines using regex patterns."""
deadlines = []
now = datetime.now()
# German date patterns
patterns = [
# "bis zum 15.01.2025"
(r"bis\s+(?:zum\s+)?(\d{1,2})\.(\d{1,2})\.(\d{2,4})", True),
# "spätestens am 15.01.2025"
(r"spätestens\s+(?:am\s+)?(\d{1,2})\.(\d{1,2})\.(\d{2,4})", True),
# "Abgabetermin: 15.01.2025"
(r"(?:Abgabe|Termin|Frist)[:\s]+(\d{1,2})\.(\d{1,2})\.(\d{2,4})", True),
# "innerhalb von 14 Tagen"
(r"innerhalb\s+von\s+(\d+)\s+(?:Tagen|Wochen)", False),
# "bis Ende Januar"
(r"bis\s+(?:Ende\s+)?(Januar|Februar|März|April|Mai|Juni|Juli|August|September|Oktober|November|Dezember)", False),
]
for pattern, is_specific_date in patterns:
matches = re.finditer(pattern, text, re.IGNORECASE)
for match in matches:
try:
if is_specific_date:
day = int(match.group(1))
month = int(match.group(2))
year = int(match.group(3))
# Handle 2-digit years
if year < 100:
year += 2000
deadline_date = datetime(year, month, day)
# Skip past dates
if deadline_date < now:
continue
# Get surrounding context
start = max(0, match.start() - 50)
end = min(len(text), match.end() + 50)
context = text[start:end].strip()
deadlines.append(DeadlineExtraction(
deadline_date=deadline_date,
description=f"Frist: {match.group(0)}",
confidence=0.85,
source_text=context,
is_firm=True,
))
else:
# Relative dates (innerhalb von X Tagen)
if "Tagen" in pattern or "Wochen" in pattern:
days = int(match.group(1))
if "Wochen" in match.group(0).lower():
days *= 7
deadline_date = now + timedelta(days=days)
deadlines.append(DeadlineExtraction(
deadline_date=deadline_date,
description=f"Relative Frist: {match.group(0)}",
confidence=0.7,
source_text=match.group(0),
is_firm=False,
))
except (ValueError, IndexError) as e:
logger.debug(f"Failed to parse date: {e}")
continue
return deadlines
async def _extract_deadlines_llm(
self,
subject: str,
body_preview: str,
) -> List[DeadlineExtraction]:
"""Extract deadlines using LLM."""
try:
client = await self.get_http_client()
prompt = f"""Analysiere diese E-Mail und extrahiere alle genannten Fristen und Termine:
Betreff: {subject}
Inhalt: {body_preview}
Liste alle Fristen im folgenden Format auf (eine pro Zeile):
DATUM|BESCHREIBUNG|VERBINDLICH
Beispiel: 2025-01-15|Abgabe der Berichte|ja
Wenn keine Fristen gefunden werden, antworte mit: KEINE_FRISTEN
Antworte NUR im angegebenen Format.
"""
response = await client.post(
f"{LLM_GATEWAY_URL}/api/v1/inference",
json={
"prompt": prompt,
"playbook": "mail_analysis",
"max_tokens": 200,
},
)
if response.status_code == 200:
data = response.json()
result_text = data.get("response", "")
if "KEINE_FRISTEN" in result_text:
return []
deadlines = []
for line in result_text.strip().split("\n"):
parts = line.split("|")
if len(parts) >= 2:
try:
date_str = parts[0].strip()
deadline_date = datetime.fromisoformat(date_str)
description = parts[1].strip()
is_firm = parts[2].strip().lower() == "ja" if len(parts) > 2 else True
deadlines.append(DeadlineExtraction(
deadline_date=deadline_date,
description=description,
confidence=0.7,
source_text=line,
is_firm=is_firm,
))
except (ValueError, IndexError):
continue
return deadlines
except Exception as e:
logger.warning(f"LLM deadline extraction failed: {e}")
return []
# =========================================================================
# Email Category Classification
# =========================================================================
"""Extract deadlines from email content."""
client = await self.get_http_client()
return await extract_deadlines(client, subject, body_text)
async def classify_category(
self,
@@ -368,155 +83,9 @@ Antworte NUR im angegebenen Format.
body_preview: str,
sender_type: SenderType,
) -> Tuple[EmailCategory, float]:
"""
Classify email into a category.
Args:
subject: Email subject
body_preview: First 200 chars of body
sender_type: Already classified sender type
Returns:
Tuple of (category, confidence)
"""
# Rule-based classification first
category, confidence = self._classify_category_rules(subject, body_preview, sender_type)
if confidence > 0.7:
return category, confidence
# Fall back to LLM
return await self._classify_category_llm(subject, body_preview)
def _classify_category_rules(
self,
subject: str,
body_preview: str,
sender_type: SenderType,
) -> Tuple[EmailCategory, float]:
"""Rule-based category classification."""
text = f"{subject} {body_preview}".lower()
# Keywords for each category
category_keywords = {
EmailCategory.DIENSTLICH: [
"dienstlich", "dienstanweisung", "erlass", "verordnung",
"bescheid", "verfügung", "ministerium", "behörde"
],
EmailCategory.PERSONAL: [
"personalrat", "stellenausschreibung", "versetzung",
"beurteilung", "dienstzeugnis", "krankmeldung", "elternzeit"
],
EmailCategory.FINANZEN: [
"budget", "haushalt", "etat", "abrechnung", "rechnung",
"erstattung", "zuschuss", "fördermittel"
],
EmailCategory.ELTERN: [
"elternbrief", "elternabend", "schulkonferenz",
"elternvertreter", "elternbeirat"
],
EmailCategory.SCHUELER: [
"schüler", "schülerin", "zeugnis", "klasse", "unterricht",
"prüfung", "klassenfahrt", "schulpflicht"
],
EmailCategory.FORTBILDUNG: [
"fortbildung", "seminar", "workshop", "schulung",
"weiterbildung", "nlq", "didaktik"
],
EmailCategory.VERANSTALTUNG: [
"einladung", "veranstaltung", "termin", "konferenz",
"sitzung", "tagung", "feier"
],
EmailCategory.SICHERHEIT: [
"sicherheit", "notfall", "brandschutz", "evakuierung",
"hygiene", "corona", "infektionsschutz"
],
EmailCategory.TECHNIK: [
"it", "software", "computer", "netzwerk", "login",
"passwort", "digitalisierung", "iserv"
],
EmailCategory.NEWSLETTER: [
"newsletter", "rundschreiben", "info-mail", "mitteilung"
],
EmailCategory.WERBUNG: [
"angebot", "rabatt", "aktion", "werbung", "abonnement"
],
}
best_category = EmailCategory.SONSTIGES
best_score = 0.0
for category, keywords in category_keywords.items():
score = sum(1 for kw in keywords if kw in text)
if score > best_score:
best_score = score
best_category = category
# Adjust based on sender type
if sender_type in [SenderType.KULTUSMINISTERIUM, SenderType.LANDESSCHULBEHOERDE, SenderType.RLSB]:
if best_category == EmailCategory.SONSTIGES:
best_category = EmailCategory.DIENSTLICH
best_score = 2
# Convert score to confidence
confidence = min(0.9, 0.4 + (best_score * 0.15))
return best_category, confidence
async def _classify_category_llm(
self,
subject: str,
body_preview: str,
) -> Tuple[EmailCategory, float]:
"""LLM-based category classification."""
try:
client = await self.get_http_client()
categories = ", ".join([c.value for c in EmailCategory])
prompt = f"""Klassifiziere diese E-Mail in EINE Kategorie:
Betreff: {subject}
Inhalt: {body_preview[:500]}
Kategorien: {categories}
Antworte NUR mit dem Kategorienamen und einer Konfidenz (0.0-1.0):
Format: kategorie|konfidenz
"""
response = await client.post(
f"{LLM_GATEWAY_URL}/api/v1/inference",
json={
"prompt": prompt,
"playbook": "mail_analysis",
"max_tokens": 50,
},
)
if response.status_code == 200:
data = response.json()
result = data.get("response", "sonstiges|0.5")
parts = result.strip().split("|")
if len(parts) >= 2:
category_str = parts[0].strip().lower()
confidence = float(parts[1].strip())
try:
category = EmailCategory(category_str)
return category, min(max(confidence, 0.0), 1.0)
except ValueError:
pass
except Exception as e:
logger.warning(f"LLM category classification failed: {e}")
return EmailCategory.SONSTIGES, 0.5
# =========================================================================
# Full Analysis Pipeline
# =========================================================================
"""Classify email into a category."""
client = await self.get_http_client()
return await classify_category(client, subject, body_preview, sender_type)
async def analyze_email(
self,
@@ -527,20 +96,7 @@ Format: kategorie|konfidenz
body_text: Optional[str],
body_preview: Optional[str],
) -> EmailAnalysisResult:
"""
Run full analysis pipeline on an email.
Args:
email_id: Database ID of the email
sender_email: Sender's email address
sender_name: Sender's display name
subject: Email subject
body_text: Full body text
body_preview: Preview text
Returns:
Complete analysis result
"""
"""Run full analysis pipeline on an email."""
# 1. Classify sender
sender_classification = await self.classify_sender(
sender_email, sender_name, subject, body_preview
@@ -569,8 +125,8 @@ Format: kategorie|konfidenz
elif days_until <= 7:
suggested_priority = max(suggested_priority, TaskPriority.MEDIUM)
# 5. Generate summary (optional, can be expensive)
summary = None # Could add LLM summary generation here
# 5. Summary (optional)
summary = None
# 6. Determine if task should be auto-created
auto_create_task = (
@@ -612,10 +168,6 @@ Format: kategorie|konfidenz
auto_create_task=auto_create_task,
)
# =========================================================================
# Response Suggestions
# =========================================================================
async def suggest_response(
self,
subject: str,
@@ -623,114 +175,11 @@ Format: kategorie|konfidenz
sender_type: SenderType,
category: EmailCategory,
) -> List[ResponseSuggestion]:
"""
Generate response suggestions for an email.
Args:
subject: Original email subject
body_text: Original email body
sender_type: Classified sender type
category: Classified category
Returns:
List of response suggestions
"""
suggestions = []
# Add standard templates based on sender type and category
if sender_type in [SenderType.KULTUSMINISTERIUM, SenderType.LANDESSCHULBEHOERDE, SenderType.RLSB]:
suggestions.append(ResponseSuggestion(
template_type="acknowledgment",
subject=f"Re: {subject}",
body="""Sehr geehrte Damen und Herren,
vielen Dank für Ihre Nachricht.
Ich bestätige den Eingang und werde die Angelegenheit fristgerecht bearbeiten.
Mit freundlichen Grüßen""",
confidence=0.8,
))
if category == EmailCategory.ELTERN:
suggestions.append(ResponseSuggestion(
template_type="parent_response",
subject=f"Re: {subject}",
body="""Liebe Eltern,
vielen Dank für Ihre Nachricht.
[Ihre Antwort hier]
Mit freundlichen Grüßen""",
confidence=0.7,
))
# Add LLM-generated suggestion
try:
llm_suggestion = await self._generate_response_llm(subject, body_text[:500], sender_type)
if llm_suggestion:
suggestions.append(llm_suggestion)
except Exception as e:
logger.warning(f"LLM response generation failed: {e}")
return suggestions
async def _generate_response_llm(
self,
subject: str,
body_preview: str,
sender_type: SenderType,
) -> Optional[ResponseSuggestion]:
"""Generate a response suggestion using LLM."""
try:
client = await self.get_http_client()
sender_desc = {
SenderType.KULTUSMINISTERIUM: "dem Kultusministerium",
SenderType.LANDESSCHULBEHOERDE: "der Landesschulbehörde",
SenderType.RLSB: "dem RLSB",
SenderType.ELTERNVERTRETER: "einem Elternvertreter",
}.get(sender_type, "einem Absender")
prompt = f"""Du bist eine Schulleiterin in Niedersachsen. Formuliere eine professionelle, kurze Antwort auf diese E-Mail von {sender_desc}:
Betreff: {subject}
Inhalt: {body_preview}
Die Antwort sollte:
- Höflich und formell sein
- Den Eingang bestätigen
- Eine konkrete nächste Aktion nennen oder um Klärung bitten
Antworte NUR mit dem Antworttext (ohne Betreffzeile, ohne "Betreff:").
"""
response = await client.post(
f"{LLM_GATEWAY_URL}/api/v1/inference",
json={
"prompt": prompt,
"playbook": "mail_analysis",
"max_tokens": 300,
},
)
if response.status_code == 200:
data = response.json()
body = data.get("response", "").strip()
if body:
return ResponseSuggestion(
template_type="ai_generated",
subject=f"Re: {subject}",
body=body,
confidence=0.6,
)
except Exception as e:
logger.warning(f"LLM response generation failed: {e}")
return None
"""Generate response suggestions for an email."""
client = await self.get_http_client()
return await suggest_response(
client, subject, body_text, sender_type, category
)
# Global instance