[split-required] Split final 43 files (500-668 LOC) to complete refactoring

klausur-service (11 files):
- cv_gutter_repair, ocr_pipeline_regression, upload_api
- ocr_pipeline_sessions, smart_spell, nru_worksheet_generator
- ocr_pipeline_overlays, mail/aggregator, zeugnis_api
- cv_syllable_detect, self_rag

backend-lehrer (17 files):
- classroom_engine/suggestions, generators/quiz_generator
- worksheets_api, llm_gateway/comparison, state_engine_api
- classroom/models (→ 4 submodules), services/file_processor
- alerts_agent/api/wizard+digests+routes, content_generators/pdf
- classroom/routes/sessions, llm_gateway/inference
- classroom_engine/analytics, auth/keycloak_auth
- alerts_agent/processing/rule_engine, ai_processor/print_versions

agent-core (5 files):
- brain/memory_store, brain/knowledge_graph, brain/context_manager
- orchestrator/supervisor, sessions/session_manager

admin-lehrer (5 components):
- GridOverlay, StepGridReview, DevOpsPipelineSidebar
- DataFlowDiagram, sbom/wizard/page

website (2 files):
- DependencyMap, lehrer/abitur-archiv

Other: nibis_ingestion, grid_detection_service, export-doclayout-onnx

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Benjamin Admin
2026-04-25 09:41:42 +02:00
parent 451365a312
commit bd4b956e3c
113 changed files with 13790 additions and 14148 deletions

View File

@@ -0,0 +1,322 @@
"""
Mail Aggregator IMAP — IMAP connection, sync, email parsing.
Extracted from aggregator.py for modularity.
"""
import email
import logging
from typing import Optional, List, Dict, Any, Tuple
from datetime import datetime, timezone
from email.header import decode_header, make_header
from email.utils import parsedate_to_datetime, parseaddr
from .mail_db import upsert_email, update_account_status, get_email_account
logger = logging.getLogger(__name__)
class IMAPConnectionError(Exception):
"""Raised when IMAP connection fails."""
pass
class IMAPMixin:
"""IMAP-related methods for MailAggregator.
Provides connection testing, syncing, and email parsing.
Must be mixed into a class that has ``_credentials_service``.
"""
def _parse_folder_name(self, folder_response: bytes) -> str:
"""Parse folder name from IMAP LIST response."""
try:
# Format: '(\\HasNoChildren) "/" "INBOX"'
decoded = folder_response.decode("utf-8") if isinstance(folder_response, bytes) else folder_response
parts = decoded.rsplit('" "', 1)
if len(parts) == 2:
return parts[1].rstrip('"')
return decoded
except Exception:
return str(folder_response)
async def test_imap_connection(
self,
imap_host: str,
imap_port: int,
imap_ssl: bool,
email_address: str,
password: str,
) -> Tuple[bool, Optional[str], Optional[List[str]]]:
"""Test IMAP connection. Returns (success, error, folders)."""
try:
import imaplib
if imap_ssl:
imap = imaplib.IMAP4_SSL(imap_host, imap_port)
else:
imap = imaplib.IMAP4(imap_host, imap_port)
imap.login(email_address, password)
# List folders
folders_found = None
status, folders = imap.list()
if status == "OK":
folders_found = [
self._parse_folder_name(f) for f in folders if f
]
imap.logout()
return True, None, folders_found
except Exception as e:
logger.warning(f"IMAP test failed for {email_address}: {e}")
return False, f"IMAP Error: {str(e)}", None
async def sync_account(
self,
account_id: str,
user_id: str,
max_emails: int = 100,
folders: Optional[List[str]] = None,
) -> Tuple[int, int]:
"""
Sync emails from an IMAP account.
Args:
account_id: The account ID
user_id: The user ID
max_emails: Maximum emails to fetch
folders: Specific folders to sync (default: INBOX)
Returns:
Tuple of (new_emails, total_emails)
"""
import imaplib
account = await get_email_account(account_id, user_id)
if not account:
raise ValueError(f"Account not found: {account_id}")
# Get credentials
vault_path = account.get("vault_path", "")
creds = await self._credentials_service.get_credentials(account_id, vault_path)
if not creds:
await update_account_status(account_id, "error", "Credentials not found")
raise IMAPConnectionError("Credentials not found")
new_count = 0
total_count = 0
try:
# Connect to IMAP
if account["imap_ssl"]:
imap = imaplib.IMAP4_SSL(account["imap_host"], account["imap_port"])
else:
imap = imaplib.IMAP4(account["imap_host"], account["imap_port"])
imap.login(creds.email, creds.password)
# Sync specified folders or just INBOX
sync_folders = folders or ["INBOX"]
for folder in sync_folders:
try:
status, _ = imap.select(folder)
if status != "OK":
continue
# Search for recent emails
status, messages = imap.search(None, "ALL")
if status != "OK":
continue
message_ids = messages[0].split()
total_count += len(message_ids)
# Fetch most recent emails
recent_ids = message_ids[-max_emails:] if len(message_ids) > max_emails else message_ids
for msg_id in recent_ids:
try:
email_data = await self._fetch_and_store_email(
imap, msg_id, account_id, user_id, account["tenant_id"], folder
)
if email_data:
new_count += 1
except Exception as e:
logger.warning(f"Failed to fetch email {msg_id}: {e}")
except Exception as e:
logger.warning(f"Failed to sync folder {folder}: {e}")
imap.logout()
# Update account status
await update_account_status(
account_id,
"active",
email_count=total_count,
unread_count=new_count, # Will be recalculated
)
return new_count, total_count
except Exception as e:
logger.error(f"Account sync failed: {e}")
await update_account_status(account_id, "error", str(e))
raise IMAPConnectionError(str(e))
async def _fetch_and_store_email(
self,
imap,
msg_id: bytes,
account_id: str,
user_id: str,
tenant_id: str,
folder: str,
) -> Optional[str]:
"""Fetch a single email and store it in the database."""
try:
status, msg_data = imap.fetch(msg_id, "(RFC822)")
if status != "OK" or not msg_data or not msg_data[0]:
return None
raw_email = msg_data[0][1]
msg = email.message_from_bytes(raw_email)
# Parse headers
message_id = msg.get("Message-ID", str(msg_id))
subject = self._decode_header(msg.get("Subject", ""))
from_header = msg.get("From", "")
sender_name, sender_email = parseaddr(from_header)
sender_name = self._decode_header(sender_name)
# Parse recipients
to_header = msg.get("To", "")
recipients = [addr[1] for addr in email.utils.getaddresses([to_header])]
cc_header = msg.get("Cc", "")
cc = [addr[1] for addr in email.utils.getaddresses([cc_header])]
# Parse dates
date_str = msg.get("Date")
try:
date_sent = parsedate_to_datetime(date_str) if date_str else datetime.now(timezone.utc)
except Exception:
date_sent = datetime.now(timezone.utc)
date_received = datetime.now(timezone.utc)
# Parse body
body_text, body_html, attachments = self._parse_body(msg)
# Create preview
body_preview = (body_text[:200] + "...") if body_text and len(body_text) > 200 else body_text
# Get headers dict
headers = {k: self._decode_header(v) for k, v in msg.items() if k not in ["Body"]}
# Store in database
email_id = await upsert_email(
account_id=account_id,
user_id=user_id,
tenant_id=tenant_id,
message_id=message_id,
subject=subject,
sender_email=sender_email,
sender_name=sender_name,
recipients=recipients,
cc=cc,
body_preview=body_preview,
body_text=body_text,
body_html=body_html,
has_attachments=len(attachments) > 0,
attachments=attachments,
headers=headers,
folder=folder,
date_sent=date_sent,
date_received=date_received,
)
return email_id
except Exception as e:
logger.error(f"Failed to parse email: {e}")
return None
def _decode_header(self, header_value: str) -> str:
"""Decode email header value."""
if not header_value:
return ""
try:
decoded = decode_header(header_value)
return str(make_header(decoded))
except Exception:
return str(header_value)
def _parse_body(self, msg) -> Tuple[Optional[str], Optional[str], List[Dict]]:
"""
Parse email body and attachments.
Returns:
Tuple of (body_text, body_html, attachments)
"""
body_text = None
body_html = None
attachments = []
if msg.is_multipart():
for part in msg.walk():
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition", ""))
# Skip multipart containers
if content_type.startswith("multipart/"):
continue
# Check for attachments
if "attachment" in content_disposition:
filename = part.get_filename()
if filename:
attachments.append({
"filename": self._decode_header(filename),
"content_type": content_type,
"size": len(part.get_payload(decode=True) or b""),
})
continue
# Get body content
try:
payload = part.get_payload(decode=True)
charset = part.get_content_charset() or "utf-8"
if payload:
text = payload.decode(charset, errors="replace")
if content_type == "text/plain" and not body_text:
body_text = text
elif content_type == "text/html" and not body_html:
body_html = text
except Exception as e:
logger.debug(f"Failed to decode body part: {e}")
else:
# Single part message
content_type = msg.get_content_type()
try:
payload = msg.get_payload(decode=True)
charset = msg.get_content_charset() or "utf-8"
if payload:
text = payload.decode(charset, errors="replace")
if content_type == "text/plain":
body_text = text
elif content_type == "text/html":
body_html = text
except Exception as e:
logger.debug(f"Failed to decode body: {e}")
return body_text, body_html, attachments