klausur-service (11 files): - cv_gutter_repair, ocr_pipeline_regression, upload_api - ocr_pipeline_sessions, smart_spell, nru_worksheet_generator - ocr_pipeline_overlays, mail/aggregator, zeugnis_api - cv_syllable_detect, self_rag backend-lehrer (17 files): - classroom_engine/suggestions, generators/quiz_generator - worksheets_api, llm_gateway/comparison, state_engine_api - classroom/models (→ 4 submodules), services/file_processor - alerts_agent/api/wizard+digests+routes, content_generators/pdf - classroom/routes/sessions, llm_gateway/inference - classroom_engine/analytics, auth/keycloak_auth - alerts_agent/processing/rule_engine, ai_processor/print_versions agent-core (5 files): - brain/memory_store, brain/knowledge_graph, brain/context_manager - orchestrator/supervisor, sessions/session_manager admin-lehrer (5 components): - GridOverlay, StepGridReview, DevOpsPipelineSidebar - DataFlowDiagram, sbom/wizard/page website (2 files): - DependencyMap, lehrer/abitur-archiv Other: nibis_ingestion, grid_detection_service, export-doclayout-onnx Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
323 lines
11 KiB
Python
323 lines
11 KiB
Python
"""
|
|
Mail Aggregator IMAP — IMAP connection, sync, email parsing.
|
|
|
|
Extracted from aggregator.py for modularity.
|
|
"""
|
|
|
|
import email
|
|
import logging
|
|
from typing import Optional, List, Dict, Any, Tuple
|
|
from datetime import datetime, timezone
|
|
from email.header import decode_header, make_header
|
|
from email.utils import parsedate_to_datetime, parseaddr
|
|
|
|
from .mail_db import upsert_email, update_account_status, get_email_account
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class IMAPConnectionError(Exception):
|
|
"""Raised when IMAP connection fails."""
|
|
pass
|
|
|
|
|
|
class IMAPMixin:
|
|
"""IMAP-related methods for MailAggregator.
|
|
|
|
Provides connection testing, syncing, and email parsing.
|
|
Must be mixed into a class that has ``_credentials_service``.
|
|
"""
|
|
|
|
def _parse_folder_name(self, folder_response: bytes) -> str:
|
|
"""Parse folder name from IMAP LIST response."""
|
|
try:
|
|
# Format: '(\\HasNoChildren) "/" "INBOX"'
|
|
decoded = folder_response.decode("utf-8") if isinstance(folder_response, bytes) else folder_response
|
|
parts = decoded.rsplit('" "', 1)
|
|
if len(parts) == 2:
|
|
return parts[1].rstrip('"')
|
|
return decoded
|
|
except Exception:
|
|
return str(folder_response)
|
|
|
|
async def test_imap_connection(
|
|
self,
|
|
imap_host: str,
|
|
imap_port: int,
|
|
imap_ssl: bool,
|
|
email_address: str,
|
|
password: str,
|
|
) -> Tuple[bool, Optional[str], Optional[List[str]]]:
|
|
"""Test IMAP connection. Returns (success, error, folders)."""
|
|
try:
|
|
import imaplib
|
|
|
|
if imap_ssl:
|
|
imap = imaplib.IMAP4_SSL(imap_host, imap_port)
|
|
else:
|
|
imap = imaplib.IMAP4(imap_host, imap_port)
|
|
|
|
imap.login(email_address, password)
|
|
|
|
# List folders
|
|
folders_found = None
|
|
status, folders = imap.list()
|
|
if status == "OK":
|
|
folders_found = [
|
|
self._parse_folder_name(f) for f in folders if f
|
|
]
|
|
|
|
imap.logout()
|
|
return True, None, folders_found
|
|
|
|
except Exception as e:
|
|
logger.warning(f"IMAP test failed for {email_address}: {e}")
|
|
return False, f"IMAP Error: {str(e)}", None
|
|
|
|
async def sync_account(
|
|
self,
|
|
account_id: str,
|
|
user_id: str,
|
|
max_emails: int = 100,
|
|
folders: Optional[List[str]] = None,
|
|
) -> Tuple[int, int]:
|
|
"""
|
|
Sync emails from an IMAP account.
|
|
|
|
Args:
|
|
account_id: The account ID
|
|
user_id: The user ID
|
|
max_emails: Maximum emails to fetch
|
|
folders: Specific folders to sync (default: INBOX)
|
|
|
|
Returns:
|
|
Tuple of (new_emails, total_emails)
|
|
"""
|
|
import imaplib
|
|
|
|
account = await get_email_account(account_id, user_id)
|
|
if not account:
|
|
raise ValueError(f"Account not found: {account_id}")
|
|
|
|
# Get credentials
|
|
vault_path = account.get("vault_path", "")
|
|
creds = await self._credentials_service.get_credentials(account_id, vault_path)
|
|
if not creds:
|
|
await update_account_status(account_id, "error", "Credentials not found")
|
|
raise IMAPConnectionError("Credentials not found")
|
|
|
|
new_count = 0
|
|
total_count = 0
|
|
|
|
try:
|
|
# Connect to IMAP
|
|
if account["imap_ssl"]:
|
|
imap = imaplib.IMAP4_SSL(account["imap_host"], account["imap_port"])
|
|
else:
|
|
imap = imaplib.IMAP4(account["imap_host"], account["imap_port"])
|
|
|
|
imap.login(creds.email, creds.password)
|
|
|
|
# Sync specified folders or just INBOX
|
|
sync_folders = folders or ["INBOX"]
|
|
|
|
for folder in sync_folders:
|
|
try:
|
|
status, _ = imap.select(folder)
|
|
if status != "OK":
|
|
continue
|
|
|
|
# Search for recent emails
|
|
status, messages = imap.search(None, "ALL")
|
|
if status != "OK":
|
|
continue
|
|
|
|
message_ids = messages[0].split()
|
|
total_count += len(message_ids)
|
|
|
|
# Fetch most recent emails
|
|
recent_ids = message_ids[-max_emails:] if len(message_ids) > max_emails else message_ids
|
|
|
|
for msg_id in recent_ids:
|
|
try:
|
|
email_data = await self._fetch_and_store_email(
|
|
imap, msg_id, account_id, user_id, account["tenant_id"], folder
|
|
)
|
|
if email_data:
|
|
new_count += 1
|
|
except Exception as e:
|
|
logger.warning(f"Failed to fetch email {msg_id}: {e}")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Failed to sync folder {folder}: {e}")
|
|
|
|
imap.logout()
|
|
|
|
# Update account status
|
|
await update_account_status(
|
|
account_id,
|
|
"active",
|
|
email_count=total_count,
|
|
unread_count=new_count, # Will be recalculated
|
|
)
|
|
|
|
return new_count, total_count
|
|
|
|
except Exception as e:
|
|
logger.error(f"Account sync failed: {e}")
|
|
await update_account_status(account_id, "error", str(e))
|
|
raise IMAPConnectionError(str(e))
|
|
|
|
async def _fetch_and_store_email(
|
|
self,
|
|
imap,
|
|
msg_id: bytes,
|
|
account_id: str,
|
|
user_id: str,
|
|
tenant_id: str,
|
|
folder: str,
|
|
) -> Optional[str]:
|
|
"""Fetch a single email and store it in the database."""
|
|
try:
|
|
status, msg_data = imap.fetch(msg_id, "(RFC822)")
|
|
if status != "OK" or not msg_data or not msg_data[0]:
|
|
return None
|
|
|
|
raw_email = msg_data[0][1]
|
|
msg = email.message_from_bytes(raw_email)
|
|
|
|
# Parse headers
|
|
message_id = msg.get("Message-ID", str(msg_id))
|
|
subject = self._decode_header(msg.get("Subject", ""))
|
|
from_header = msg.get("From", "")
|
|
sender_name, sender_email = parseaddr(from_header)
|
|
sender_name = self._decode_header(sender_name)
|
|
|
|
# Parse recipients
|
|
to_header = msg.get("To", "")
|
|
recipients = [addr[1] for addr in email.utils.getaddresses([to_header])]
|
|
|
|
cc_header = msg.get("Cc", "")
|
|
cc = [addr[1] for addr in email.utils.getaddresses([cc_header])]
|
|
|
|
# Parse dates
|
|
date_str = msg.get("Date")
|
|
try:
|
|
date_sent = parsedate_to_datetime(date_str) if date_str else datetime.now(timezone.utc)
|
|
except Exception:
|
|
date_sent = datetime.now(timezone.utc)
|
|
|
|
date_received = datetime.now(timezone.utc)
|
|
|
|
# Parse body
|
|
body_text, body_html, attachments = self._parse_body(msg)
|
|
|
|
# Create preview
|
|
body_preview = (body_text[:200] + "...") if body_text and len(body_text) > 200 else body_text
|
|
|
|
# Get headers dict
|
|
headers = {k: self._decode_header(v) for k, v in msg.items() if k not in ["Body"]}
|
|
|
|
# Store in database
|
|
email_id = await upsert_email(
|
|
account_id=account_id,
|
|
user_id=user_id,
|
|
tenant_id=tenant_id,
|
|
message_id=message_id,
|
|
subject=subject,
|
|
sender_email=sender_email,
|
|
sender_name=sender_name,
|
|
recipients=recipients,
|
|
cc=cc,
|
|
body_preview=body_preview,
|
|
body_text=body_text,
|
|
body_html=body_html,
|
|
has_attachments=len(attachments) > 0,
|
|
attachments=attachments,
|
|
headers=headers,
|
|
folder=folder,
|
|
date_sent=date_sent,
|
|
date_received=date_received,
|
|
)
|
|
|
|
return email_id
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to parse email: {e}")
|
|
return None
|
|
|
|
def _decode_header(self, header_value: str) -> str:
|
|
"""Decode email header value."""
|
|
if not header_value:
|
|
return ""
|
|
try:
|
|
decoded = decode_header(header_value)
|
|
return str(make_header(decoded))
|
|
except Exception:
|
|
return str(header_value)
|
|
|
|
def _parse_body(self, msg) -> Tuple[Optional[str], Optional[str], List[Dict]]:
|
|
"""
|
|
Parse email body and attachments.
|
|
|
|
Returns:
|
|
Tuple of (body_text, body_html, attachments)
|
|
"""
|
|
body_text = None
|
|
body_html = None
|
|
attachments = []
|
|
|
|
if msg.is_multipart():
|
|
for part in msg.walk():
|
|
content_type = part.get_content_type()
|
|
content_disposition = str(part.get("Content-Disposition", ""))
|
|
|
|
# Skip multipart containers
|
|
if content_type.startswith("multipart/"):
|
|
continue
|
|
|
|
# Check for attachments
|
|
if "attachment" in content_disposition:
|
|
filename = part.get_filename()
|
|
if filename:
|
|
attachments.append({
|
|
"filename": self._decode_header(filename),
|
|
"content_type": content_type,
|
|
"size": len(part.get_payload(decode=True) or b""),
|
|
})
|
|
continue
|
|
|
|
# Get body content
|
|
try:
|
|
payload = part.get_payload(decode=True)
|
|
charset = part.get_content_charset() or "utf-8"
|
|
|
|
if payload:
|
|
text = payload.decode(charset, errors="replace")
|
|
|
|
if content_type == "text/plain" and not body_text:
|
|
body_text = text
|
|
elif content_type == "text/html" and not body_html:
|
|
body_html = text
|
|
except Exception as e:
|
|
logger.debug(f"Failed to decode body part: {e}")
|
|
|
|
else:
|
|
# Single part message
|
|
content_type = msg.get_content_type()
|
|
try:
|
|
payload = msg.get_payload(decode=True)
|
|
charset = msg.get_content_charset() or "utf-8"
|
|
|
|
if payload:
|
|
text = payload.decode(charset, errors="replace")
|
|
|
|
if content_type == "text/plain":
|
|
body_text = text
|
|
elif content_type == "text/html":
|
|
body_html = text
|
|
except Exception as e:
|
|
logger.debug(f"Failed to decode body: {e}")
|
|
|
|
return body_text, body_html, attachments
|