[split-required] Split 700-870 LOC files across all services
backend-lehrer (11 files): - llm_gateway/routes/schools.py (867 → 5), recording_api.py (848 → 6) - messenger_api.py (840 → 5), print_generator.py (824 → 5) - unit_analytics_api.py (751 → 5), classroom/routes/context.py (726 → 4) - llm_gateway/routes/edu_search_seeds.py (710 → 4) klausur-service (12 files): - ocr_labeling_api.py (845 → 4), metrics_db.py (833 → 4) - legal_corpus_api.py (790 → 4), page_crop.py (758 → 3) - mail/ai_service.py (747 → 4), github_crawler.py (767 → 3) - trocr_service.py (730 → 4), full_compliance_pipeline.py (723 → 4) - dsfa_rag_api.py (715 → 4), ocr_pipeline_auto.py (705 → 4) website (6 pages): - audit-checklist (867 → 8), content (806 → 6) - screen-flow (790 → 4), scraper (789 → 5) - zeugnisse (776 → 5), modules (745 → 4) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
134
klausur-service/backend/mail/ai_sender.py
Normal file
134
klausur-service/backend/mail/ai_sender.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""
|
||||
AI Email - Sender Classification
|
||||
|
||||
Domain-based and LLM-based sender classification for emails.
|
||||
|
||||
Extracted from ai_service.py to keep files under 500 LOC.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from .models import (
|
||||
SenderType,
|
||||
SenderClassification,
|
||||
classify_sender_by_domain,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# LLM Gateway configuration
|
||||
LLM_GATEWAY_URL = os.getenv("LLM_GATEWAY_URL", "http://localhost:8090")
|
||||
|
||||
|
||||
async def classify_sender(
|
||||
http_client: httpx.AsyncClient,
|
||||
sender_email: str,
|
||||
sender_name: Optional[str] = None,
|
||||
subject: Optional[str] = None,
|
||||
body_preview: Optional[str] = None,
|
||||
) -> SenderClassification:
|
||||
"""
|
||||
Classify the sender of an email.
|
||||
|
||||
First tries domain matching, then falls back to LLM.
|
||||
"""
|
||||
# Try domain-based classification first (fast, high confidence)
|
||||
domain_result = classify_sender_by_domain(sender_email)
|
||||
if domain_result:
|
||||
return domain_result
|
||||
|
||||
# Fall back to LLM classification
|
||||
return await _classify_sender_llm(
|
||||
http_client, sender_email, sender_name, subject, body_preview
|
||||
)
|
||||
|
||||
|
||||
async def _classify_sender_llm(
|
||||
client: httpx.AsyncClient,
|
||||
sender_email: str,
|
||||
sender_name: Optional[str],
|
||||
subject: Optional[str],
|
||||
body_preview: Optional[str],
|
||||
) -> SenderClassification:
|
||||
"""Classify sender using LLM."""
|
||||
try:
|
||||
prompt = f"""Analysiere den Absender dieser E-Mail und klassifiziere ihn:
|
||||
|
||||
Absender E-Mail: {sender_email}
|
||||
Absender Name: {sender_name or "Nicht angegeben"}
|
||||
Betreff: {subject or "Nicht angegeben"}
|
||||
Vorschau: {body_preview[:200] if body_preview else "Nicht verfuegbar"}
|
||||
|
||||
Klassifiziere den Absender in EINE der folgenden Kategorien:
|
||||
- kultusministerium: Kultusministerium/Bildungsministerium
|
||||
- landesschulbehoerde: Landesschulbehoerde
|
||||
- rlsb: Regionales Landesamt fuer Schule und Bildung
|
||||
- schulamt: Schulamt
|
||||
- nibis: Niedersaechsischer Bildungsserver
|
||||
- schultraeger: Schultraeger/Kommune
|
||||
- elternvertreter: Elternvertreter/Elternrat
|
||||
- gewerkschaft: Gewerkschaft (GEW, VBE, etc.)
|
||||
- fortbildungsinstitut: Fortbildungsinstitut (NLQ, etc.)
|
||||
- privatperson: Privatperson
|
||||
- unternehmen: Unternehmen/Firma
|
||||
- unbekannt: Nicht einzuordnen
|
||||
|
||||
Antworte NUR mit dem Kategorienamen (z.B. "kultusministerium") und einer Konfidenz von 0.0 bis 1.0.
|
||||
Format: kategorie|konfidenz|kurze_begruendung
|
||||
"""
|
||||
|
||||
response = await client.post(
|
||||
f"{LLM_GATEWAY_URL}/api/v1/inference",
|
||||
json={
|
||||
"prompt": prompt,
|
||||
"playbook": "mail_analysis",
|
||||
"max_tokens": 100,
|
||||
},
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
result_text = data.get("response", "unbekannt|0.5|")
|
||||
|
||||
parts = result_text.strip().split("|")
|
||||
if len(parts) >= 2:
|
||||
sender_type_str = parts[0].strip().lower()
|
||||
confidence = float(parts[1].strip())
|
||||
|
||||
type_mapping = {
|
||||
"kultusministerium": SenderType.KULTUSMINISTERIUM,
|
||||
"landesschulbehoerde": SenderType.LANDESSCHULBEHOERDE,
|
||||
"rlsb": SenderType.RLSB,
|
||||
"schulamt": SenderType.SCHULAMT,
|
||||
"nibis": SenderType.NIBIS,
|
||||
"schultraeger": SenderType.SCHULTRAEGER,
|
||||
"elternvertreter": SenderType.ELTERNVERTRETER,
|
||||
"gewerkschaft": SenderType.GEWERKSCHAFT,
|
||||
"fortbildungsinstitut": SenderType.FORTBILDUNGSINSTITUT,
|
||||
"privatperson": SenderType.PRIVATPERSON,
|
||||
"unternehmen": SenderType.UNTERNEHMEN,
|
||||
}
|
||||
|
||||
sender_type = type_mapping.get(sender_type_str, SenderType.UNBEKANNT)
|
||||
|
||||
return SenderClassification(
|
||||
sender_type=sender_type,
|
||||
confidence=min(max(confidence, 0.0), 1.0),
|
||||
domain_matched=False,
|
||||
ai_classified=True,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"LLM sender classification failed: {e}")
|
||||
|
||||
# Default fallback
|
||||
return SenderClassification(
|
||||
sender_type=SenderType.UNBEKANNT,
|
||||
confidence=0.3,
|
||||
domain_matched=False,
|
||||
ai_classified=False,
|
||||
)
|
||||
Reference in New Issue
Block a user