Files
breakpilot-lehrer/klausur-service/backend/mail/ai_sender.py
Benjamin Admin 34da9f4cda [split-required] Split 700-870 LOC files across all services
backend-lehrer (11 files):
- llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6)
- messenger_api.py (840 → 5), print_generator.py (824 → 5)
- unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4)
- llm_gateway/routes/edu_search_seeds.py (710 → 4)

klausur-service (12 files):
- ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4)
- legal_corpus_api.py (790 → 4), page_crop.py (758 → 3)
- mail/ai_service.py (747 → 4), github_crawler.py (767 → 3)
- trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4)
- dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4)

website (6 pages):
- audit-checklist (867 → 8), content (806 → 6)
- screen-flow (790 → 4), scraper (789 → 5)
- zeugnisse (776 → 5), modules (745 → 4)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 08:01:18 +02:00

135 lines
4.2 KiB
Python

"""
AI Email - Sender Classification
Domain-based and LLM-based sender classification for emails.
Extracted from ai_service.py to keep files under 500 LOC.
"""
import os
import logging
from typing import Optional
import httpx
from .models import (
SenderType,
SenderClassification,
classify_sender_by_domain,
)
logger = logging.getLogger(__name__)
# LLM Gateway configuration
LLM_GATEWAY_URL = os.getenv("LLM_GATEWAY_URL", "http://localhost:8090")
async def classify_sender(
http_client: httpx.AsyncClient,
sender_email: str,
sender_name: Optional[str] = None,
subject: Optional[str] = None,
body_preview: Optional[str] = None,
) -> SenderClassification:
"""
Classify the sender of an email.
First tries domain matching, then falls back to LLM.
"""
# Try domain-based classification first (fast, high confidence)
domain_result = classify_sender_by_domain(sender_email)
if domain_result:
return domain_result
# Fall back to LLM classification
return await _classify_sender_llm(
http_client, sender_email, sender_name, subject, body_preview
)
async def _classify_sender_llm(
client: httpx.AsyncClient,
sender_email: str,
sender_name: Optional[str],
subject: Optional[str],
body_preview: Optional[str],
) -> SenderClassification:
"""Classify sender using LLM."""
try:
prompt = f"""Analysiere den Absender dieser E-Mail und klassifiziere ihn:
Absender E-Mail: {sender_email}
Absender Name: {sender_name or "Nicht angegeben"}
Betreff: {subject or "Nicht angegeben"}
Vorschau: {body_preview[:200] if body_preview else "Nicht verfuegbar"}
Klassifiziere den Absender in EINE der folgenden Kategorien:
- kultusministerium: Kultusministerium/Bildungsministerium
- landesschulbehoerde: Landesschulbehoerde
- rlsb: Regionales Landesamt fuer Schule und Bildung
- schulamt: Schulamt
- nibis: Niedersaechsischer Bildungsserver
- schultraeger: Schultraeger/Kommune
- elternvertreter: Elternvertreter/Elternrat
- gewerkschaft: Gewerkschaft (GEW, VBE, etc.)
- fortbildungsinstitut: Fortbildungsinstitut (NLQ, etc.)
- privatperson: Privatperson
- unternehmen: Unternehmen/Firma
- unbekannt: Nicht einzuordnen
Antworte NUR mit dem Kategorienamen (z.B. "kultusministerium") und einer Konfidenz von 0.0 bis 1.0.
Format: kategorie|konfidenz|kurze_begruendung
"""
response = await client.post(
f"{LLM_GATEWAY_URL}/api/v1/inference",
json={
"prompt": prompt,
"playbook": "mail_analysis",
"max_tokens": 100,
},
)
if response.status_code == 200:
data = response.json()
result_text = data.get("response", "unbekannt|0.5|")
parts = result_text.strip().split("|")
if len(parts) >= 2:
sender_type_str = parts[0].strip().lower()
confidence = float(parts[1].strip())
type_mapping = {
"kultusministerium": SenderType.KULTUSMINISTERIUM,
"landesschulbehoerde": SenderType.LANDESSCHULBEHOERDE,
"rlsb": SenderType.RLSB,
"schulamt": SenderType.SCHULAMT,
"nibis": SenderType.NIBIS,
"schultraeger": SenderType.SCHULTRAEGER,
"elternvertreter": SenderType.ELTERNVERTRETER,
"gewerkschaft": SenderType.GEWERKSCHAFT,
"fortbildungsinstitut": SenderType.FORTBILDUNGSINSTITUT,
"privatperson": SenderType.PRIVATPERSON,
"unternehmen": SenderType.UNTERNEHMEN,
}
sender_type = type_mapping.get(sender_type_str, SenderType.UNBEKANNT)
return SenderClassification(
sender_type=sender_type,
confidence=min(max(confidence, 0.0), 1.0),
domain_matched=False,
ai_classified=True,
)
except Exception as e:
logger.warning(f"LLM sender classification failed: {e}")
# Default fallback
return SenderClassification(
sender_type=SenderType.UNBEKANNT,
confidence=0.3,
domain_matched=False,
ai_classified=False,
)