[split-required] Split final 43 files (500-668 LOC) to complete refactoring
klausur-service (11 files): - cv_gutter_repair, ocr_pipeline_regression, upload_api - ocr_pipeline_sessions, smart_spell, nru_worksheet_generator - ocr_pipeline_overlays, mail/aggregator, zeugnis_api - cv_syllable_detect, self_rag backend-lehrer (17 files): - classroom_engine/suggestions, generators/quiz_generator - worksheets_api, llm_gateway/comparison, state_engine_api - classroom/models (→ 4 submodules), services/file_processor - alerts_agent/api/wizard+digests+routes, content_generators/pdf - classroom/routes/sessions, llm_gateway/inference - classroom_engine/analytics, auth/keycloak_auth - alerts_agent/processing/rule_engine, ai_processor/print_versions agent-core (5 files): - brain/memory_store, brain/knowledge_graph, brain/context_manager - orchestrator/supervisor, sessions/session_manager admin-lehrer (5 components): - GridOverlay, StepGridReview, DevOpsPipelineSidebar - DataFlowDiagram, sbom/wizard/page website (2 files): - DependencyMap, lehrer/abitur-archiv Other: nibis_ingestion, grid_detection_service, export-doclayout-onnx Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
322
klausur-service/backend/mail/aggregator_imap.py
Normal file
322
klausur-service/backend/mail/aggregator_imap.py
Normal file
@@ -0,0 +1,322 @@
|
||||
"""
|
||||
Mail Aggregator IMAP — IMAP connection, sync, email parsing.
|
||||
|
||||
Extracted from aggregator.py for modularity.
|
||||
"""
|
||||
|
||||
import email
|
||||
import logging
|
||||
from typing import Optional, List, Dict, Any, Tuple
|
||||
from datetime import datetime, timezone
|
||||
from email.header import decode_header, make_header
|
||||
from email.utils import parsedate_to_datetime, parseaddr
|
||||
|
||||
from .mail_db import upsert_email, update_account_status, get_email_account
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IMAPConnectionError(Exception):
|
||||
"""Raised when IMAP connection fails."""
|
||||
pass
|
||||
|
||||
|
||||
class IMAPMixin:
|
||||
"""IMAP-related methods for MailAggregator.
|
||||
|
||||
Provides connection testing, syncing, and email parsing.
|
||||
Must be mixed into a class that has ``_credentials_service``.
|
||||
"""
|
||||
|
||||
def _parse_folder_name(self, folder_response: bytes) -> str:
|
||||
"""Parse folder name from IMAP LIST response."""
|
||||
try:
|
||||
# Format: '(\\HasNoChildren) "/" "INBOX"'
|
||||
decoded = folder_response.decode("utf-8") if isinstance(folder_response, bytes) else folder_response
|
||||
parts = decoded.rsplit('" "', 1)
|
||||
if len(parts) == 2:
|
||||
return parts[1].rstrip('"')
|
||||
return decoded
|
||||
except Exception:
|
||||
return str(folder_response)
|
||||
|
||||
async def test_imap_connection(
|
||||
self,
|
||||
imap_host: str,
|
||||
imap_port: int,
|
||||
imap_ssl: bool,
|
||||
email_address: str,
|
||||
password: str,
|
||||
) -> Tuple[bool, Optional[str], Optional[List[str]]]:
|
||||
"""Test IMAP connection. Returns (success, error, folders)."""
|
||||
try:
|
||||
import imaplib
|
||||
|
||||
if imap_ssl:
|
||||
imap = imaplib.IMAP4_SSL(imap_host, imap_port)
|
||||
else:
|
||||
imap = imaplib.IMAP4(imap_host, imap_port)
|
||||
|
||||
imap.login(email_address, password)
|
||||
|
||||
# List folders
|
||||
folders_found = None
|
||||
status, folders = imap.list()
|
||||
if status == "OK":
|
||||
folders_found = [
|
||||
self._parse_folder_name(f) for f in folders if f
|
||||
]
|
||||
|
||||
imap.logout()
|
||||
return True, None, folders_found
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"IMAP test failed for {email_address}: {e}")
|
||||
return False, f"IMAP Error: {str(e)}", None
|
||||
|
||||
async def sync_account(
|
||||
self,
|
||||
account_id: str,
|
||||
user_id: str,
|
||||
max_emails: int = 100,
|
||||
folders: Optional[List[str]] = None,
|
||||
) -> Tuple[int, int]:
|
||||
"""
|
||||
Sync emails from an IMAP account.
|
||||
|
||||
Args:
|
||||
account_id: The account ID
|
||||
user_id: The user ID
|
||||
max_emails: Maximum emails to fetch
|
||||
folders: Specific folders to sync (default: INBOX)
|
||||
|
||||
Returns:
|
||||
Tuple of (new_emails, total_emails)
|
||||
"""
|
||||
import imaplib
|
||||
|
||||
account = await get_email_account(account_id, user_id)
|
||||
if not account:
|
||||
raise ValueError(f"Account not found: {account_id}")
|
||||
|
||||
# Get credentials
|
||||
vault_path = account.get("vault_path", "")
|
||||
creds = await self._credentials_service.get_credentials(account_id, vault_path)
|
||||
if not creds:
|
||||
await update_account_status(account_id, "error", "Credentials not found")
|
||||
raise IMAPConnectionError("Credentials not found")
|
||||
|
||||
new_count = 0
|
||||
total_count = 0
|
||||
|
||||
try:
|
||||
# Connect to IMAP
|
||||
if account["imap_ssl"]:
|
||||
imap = imaplib.IMAP4_SSL(account["imap_host"], account["imap_port"])
|
||||
else:
|
||||
imap = imaplib.IMAP4(account["imap_host"], account["imap_port"])
|
||||
|
||||
imap.login(creds.email, creds.password)
|
||||
|
||||
# Sync specified folders or just INBOX
|
||||
sync_folders = folders or ["INBOX"]
|
||||
|
||||
for folder in sync_folders:
|
||||
try:
|
||||
status, _ = imap.select(folder)
|
||||
if status != "OK":
|
||||
continue
|
||||
|
||||
# Search for recent emails
|
||||
status, messages = imap.search(None, "ALL")
|
||||
if status != "OK":
|
||||
continue
|
||||
|
||||
message_ids = messages[0].split()
|
||||
total_count += len(message_ids)
|
||||
|
||||
# Fetch most recent emails
|
||||
recent_ids = message_ids[-max_emails:] if len(message_ids) > max_emails else message_ids
|
||||
|
||||
for msg_id in recent_ids:
|
||||
try:
|
||||
email_data = await self._fetch_and_store_email(
|
||||
imap, msg_id, account_id, user_id, account["tenant_id"], folder
|
||||
)
|
||||
if email_data:
|
||||
new_count += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch email {msg_id}: {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to sync folder {folder}: {e}")
|
||||
|
||||
imap.logout()
|
||||
|
||||
# Update account status
|
||||
await update_account_status(
|
||||
account_id,
|
||||
"active",
|
||||
email_count=total_count,
|
||||
unread_count=new_count, # Will be recalculated
|
||||
)
|
||||
|
||||
return new_count, total_count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Account sync failed: {e}")
|
||||
await update_account_status(account_id, "error", str(e))
|
||||
raise IMAPConnectionError(str(e))
|
||||
|
||||
async def _fetch_and_store_email(
|
||||
self,
|
||||
imap,
|
||||
msg_id: bytes,
|
||||
account_id: str,
|
||||
user_id: str,
|
||||
tenant_id: str,
|
||||
folder: str,
|
||||
) -> Optional[str]:
|
||||
"""Fetch a single email and store it in the database."""
|
||||
try:
|
||||
status, msg_data = imap.fetch(msg_id, "(RFC822)")
|
||||
if status != "OK" or not msg_data or not msg_data[0]:
|
||||
return None
|
||||
|
||||
raw_email = msg_data[0][1]
|
||||
msg = email.message_from_bytes(raw_email)
|
||||
|
||||
# Parse headers
|
||||
message_id = msg.get("Message-ID", str(msg_id))
|
||||
subject = self._decode_header(msg.get("Subject", ""))
|
||||
from_header = msg.get("From", "")
|
||||
sender_name, sender_email = parseaddr(from_header)
|
||||
sender_name = self._decode_header(sender_name)
|
||||
|
||||
# Parse recipients
|
||||
to_header = msg.get("To", "")
|
||||
recipients = [addr[1] for addr in email.utils.getaddresses([to_header])]
|
||||
|
||||
cc_header = msg.get("Cc", "")
|
||||
cc = [addr[1] for addr in email.utils.getaddresses([cc_header])]
|
||||
|
||||
# Parse dates
|
||||
date_str = msg.get("Date")
|
||||
try:
|
||||
date_sent = parsedate_to_datetime(date_str) if date_str else datetime.now(timezone.utc)
|
||||
except Exception:
|
||||
date_sent = datetime.now(timezone.utc)
|
||||
|
||||
date_received = datetime.now(timezone.utc)
|
||||
|
||||
# Parse body
|
||||
body_text, body_html, attachments = self._parse_body(msg)
|
||||
|
||||
# Create preview
|
||||
body_preview = (body_text[:200] + "...") if body_text and len(body_text) > 200 else body_text
|
||||
|
||||
# Get headers dict
|
||||
headers = {k: self._decode_header(v) for k, v in msg.items() if k not in ["Body"]}
|
||||
|
||||
# Store in database
|
||||
email_id = await upsert_email(
|
||||
account_id=account_id,
|
||||
user_id=user_id,
|
||||
tenant_id=tenant_id,
|
||||
message_id=message_id,
|
||||
subject=subject,
|
||||
sender_email=sender_email,
|
||||
sender_name=sender_name,
|
||||
recipients=recipients,
|
||||
cc=cc,
|
||||
body_preview=body_preview,
|
||||
body_text=body_text,
|
||||
body_html=body_html,
|
||||
has_attachments=len(attachments) > 0,
|
||||
attachments=attachments,
|
||||
headers=headers,
|
||||
folder=folder,
|
||||
date_sent=date_sent,
|
||||
date_received=date_received,
|
||||
)
|
||||
|
||||
return email_id
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse email: {e}")
|
||||
return None
|
||||
|
||||
def _decode_header(self, header_value: str) -> str:
|
||||
"""Decode email header value."""
|
||||
if not header_value:
|
||||
return ""
|
||||
try:
|
||||
decoded = decode_header(header_value)
|
||||
return str(make_header(decoded))
|
||||
except Exception:
|
||||
return str(header_value)
|
||||
|
||||
def _parse_body(self, msg) -> Tuple[Optional[str], Optional[str], List[Dict]]:
|
||||
"""
|
||||
Parse email body and attachments.
|
||||
|
||||
Returns:
|
||||
Tuple of (body_text, body_html, attachments)
|
||||
"""
|
||||
body_text = None
|
||||
body_html = None
|
||||
attachments = []
|
||||
|
||||
if msg.is_multipart():
|
||||
for part in msg.walk():
|
||||
content_type = part.get_content_type()
|
||||
content_disposition = str(part.get("Content-Disposition", ""))
|
||||
|
||||
# Skip multipart containers
|
||||
if content_type.startswith("multipart/"):
|
||||
continue
|
||||
|
||||
# Check for attachments
|
||||
if "attachment" in content_disposition:
|
||||
filename = part.get_filename()
|
||||
if filename:
|
||||
attachments.append({
|
||||
"filename": self._decode_header(filename),
|
||||
"content_type": content_type,
|
||||
"size": len(part.get_payload(decode=True) or b""),
|
||||
})
|
||||
continue
|
||||
|
||||
# Get body content
|
||||
try:
|
||||
payload = part.get_payload(decode=True)
|
||||
charset = part.get_content_charset() or "utf-8"
|
||||
|
||||
if payload:
|
||||
text = payload.decode(charset, errors="replace")
|
||||
|
||||
if content_type == "text/plain" and not body_text:
|
||||
body_text = text
|
||||
elif content_type == "text/html" and not body_html:
|
||||
body_html = text
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to decode body part: {e}")
|
||||
|
||||
else:
|
||||
# Single part message
|
||||
content_type = msg.get_content_type()
|
||||
try:
|
||||
payload = msg.get_payload(decode=True)
|
||||
charset = msg.get_content_charset() or "utf-8"
|
||||
|
||||
if payload:
|
||||
text = payload.decode(charset, errors="replace")
|
||||
|
||||
if content_type == "text/plain":
|
||||
body_text = text
|
||||
elif content_type == "text/html":
|
||||
body_html = text
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to decode body: {e}")
|
||||
|
||||
return body_text, body_html, attachments
|
||||
Reference in New Issue
Block a user