backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
135 lines
4.2 KiB
Python
135 lines
4.2 KiB
Python
"""
|
|
AI Email - Sender Classification
|
|
|
|
Domain-based and LLM-based sender classification for emails.
|
|
|
|
Extracted from ai_service.py to keep files under 500 LOC.
|
|
"""
|
|
|
|
import os
|
|
import logging
|
|
from typing import Optional
|
|
|
|
import httpx
|
|
|
|
from .models import (
|
|
SenderType,
|
|
SenderClassification,
|
|
classify_sender_by_domain,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# LLM Gateway configuration
|
|
LLM_GATEWAY_URL = os.getenv("LLM_GATEWAY_URL", "http://localhost:8090")
|
|
|
|
|
|
async def classify_sender(
|
|
http_client: httpx.AsyncClient,
|
|
sender_email: str,
|
|
sender_name: Optional[str] = None,
|
|
subject: Optional[str] = None,
|
|
body_preview: Optional[str] = None,
|
|
) -> SenderClassification:
|
|
"""
|
|
Classify the sender of an email.
|
|
|
|
First tries domain matching, then falls back to LLM.
|
|
"""
|
|
# Try domain-based classification first (fast, high confidence)
|
|
domain_result = classify_sender_by_domain(sender_email)
|
|
if domain_result:
|
|
return domain_result
|
|
|
|
# Fall back to LLM classification
|
|
return await _classify_sender_llm(
|
|
http_client, sender_email, sender_name, subject, body_preview
|
|
)
|
|
|
|
|
|
async def _classify_sender_llm(
|
|
client: httpx.AsyncClient,
|
|
sender_email: str,
|
|
sender_name: Optional[str],
|
|
subject: Optional[str],
|
|
body_preview: Optional[str],
|
|
) -> SenderClassification:
|
|
"""Classify sender using LLM."""
|
|
try:
|
|
prompt = f"""Analysiere den Absender dieser E-Mail und klassifiziere ihn:
|
|
|
|
Absender E-Mail: {sender_email}
|
|
Absender Name: {sender_name or "Nicht angegeben"}
|
|
Betreff: {subject or "Nicht angegeben"}
|
|
Vorschau: {body_preview[:200] if body_preview else "Nicht verfuegbar"}
|
|
|
|
Klassifiziere den Absender in EINE der folgenden Kategorien:
|
|
- kultusministerium: Kultusministerium/Bildungsministerium
|
|
- landesschulbehoerde: Landesschulbehoerde
|
|
- rlsb: Regionales Landesamt fuer Schule und Bildung
|
|
- schulamt: Schulamt
|
|
- nibis: Niedersaechsischer Bildungsserver
|
|
- schultraeger: Schultraeger/Kommune
|
|
- elternvertreter: Elternvertreter/Elternrat
|
|
- gewerkschaft: Gewerkschaft (GEW, VBE, etc.)
|
|
- fortbildungsinstitut: Fortbildungsinstitut (NLQ, etc.)
|
|
- privatperson: Privatperson
|
|
- unternehmen: Unternehmen/Firma
|
|
- unbekannt: Nicht einzuordnen
|
|
|
|
Antworte NUR mit dem Kategorienamen (z.B. "kultusministerium") und einer Konfidenz von 0.0 bis 1.0.
|
|
Format: kategorie|konfidenz|kurze_begruendung
|
|
"""
|
|
|
|
response = await client.post(
|
|
f"{LLM_GATEWAY_URL}/api/v1/inference",
|
|
json={
|
|
"prompt": prompt,
|
|
"playbook": "mail_analysis",
|
|
"max_tokens": 100,
|
|
},
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
result_text = data.get("response", "unbekannt|0.5|")
|
|
|
|
parts = result_text.strip().split("|")
|
|
if len(parts) >= 2:
|
|
sender_type_str = parts[0].strip().lower()
|
|
confidence = float(parts[1].strip())
|
|
|
|
type_mapping = {
|
|
"kultusministerium": SenderType.KULTUSMINISTERIUM,
|
|
"landesschulbehoerde": SenderType.LANDESSCHULBEHOERDE,
|
|
"rlsb": SenderType.RLSB,
|
|
"schulamt": SenderType.SCHULAMT,
|
|
"nibis": SenderType.NIBIS,
|
|
"schultraeger": SenderType.SCHULTRAEGER,
|
|
"elternvertreter": SenderType.ELTERNVERTRETER,
|
|
"gewerkschaft": SenderType.GEWERKSCHAFT,
|
|
"fortbildungsinstitut": SenderType.FORTBILDUNGSINSTITUT,
|
|
"privatperson": SenderType.PRIVATPERSON,
|
|
"unternehmen": SenderType.UNTERNEHMEN,
|
|
}
|
|
|
|
sender_type = type_mapping.get(sender_type_str, SenderType.UNBEKANNT)
|
|
|
|
return SenderClassification(
|
|
sender_type=sender_type,
|
|
confidence=min(max(confidence, 0.0), 1.0),
|
|
domain_matched=False,
|
|
ai_classified=True,
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"LLM sender classification failed: {e}")
|
|
|
|
# Default fallback
|
|
return SenderClassification(
|
|
sender_type=SenderType.UNBEKANNT,
|
|
confidence=0.3,
|
|
domain_matched=False,
|
|
ai_classified=False,
|
|
)
|