Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 52 additions & 23 deletions src/fast_mail_parser.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
//! PyO3 binding layer for the `fast_mail_parser` extension module.
//!
//! The crate intentionally keeps two parallel data models:
//!
//! - [`mail_parser`] is a **PyO3-free core**: `Mail`/`Attachment` are plain Rust
//! types that hold the parsed message. Because they have no Python dependency,
//! the parsing logic can be exercised and tested independently of any Python
//! runtime.
//! - This module is the **PyO3 binding layer**: [`PyMail`]/[`PyAttachment`] wrap
//! the core types and expose them to Python, converting Rust values into Python
//! objects (e.g. `Vec<u8>` -> `bytes`).
//!
//! Keeping the split decouples the parsing logic from the Python bindings: the
//! core stays portable and unit-testable, while everything PyO3-specific lives
//! here.

mod mail_parser;

use pyo3::prelude::*;
Expand Down Expand Up @@ -35,6 +51,10 @@ impl PyAttachment {
}
}

/// A parsed email message exposed to Python.
///
/// Note that [`attachments`](Self::attachments) is **not** limited to file
/// attachments: it contains every node of the message's MIME tree.
#[pyclass]
pub struct PyMail {
#[pyo3(get)]
Expand All @@ -45,6 +65,13 @@ pub struct PyMail {
pub text_html: Vec<String>,
#[pyo3(get)]
pub date: String,
/// Every node of the message's MIME tree, not just file attachments.
///
/// This includes the text parts (`text/plain`, `text/html`, whose decoded
/// bodies also appear in `text_plain`/`text_html`) and the multipart
/// container nodes themselves (e.g. `multipart/mixed`, `multipart/alternative`).
/// Container nodes carry their MIME type but have empty `content`. A part is
/// only a real file attachment when its `filename` is non-empty.
#[pyo3(get)]
pub attachments: Vec<PyAttachment>,
#[pyo3(get)]
Expand All @@ -68,36 +95,38 @@ impl PyMail {
}
}

trait PyToBytes {
fn to_bytes(&self, py: Python<'_>) -> PyResult<Vec<u8>>;
}

impl PyToBytes for Py<PyAny> {
fn to_bytes(&self, py: Python<'_>) -> PyResult<Vec<u8>> {
let obj = self.bind(py);

if let Ok(bytes) = obj.cast::<PyBytes>() {
return Ok(bytes.as_bytes().to_vec());
}
/// Interpret a Python object as a byte buffer for parsing.
///
/// Accepts `bytes` (used as-is) or `str` (decoded as its UTF-8 bytes; ASCII is
/// unchanged because ASCII == its own UTF-8, and non-ASCII code points round-trip
/// correctly instead of being truncated to their low byte). Any other type raises
/// Python `TypeError`.
fn payload_to_bytes(payload: &Py<PyAny>, py: Python<'_>) -> PyResult<Vec<u8>> {
let obj = payload.bind(py);

if let Ok(bytes) = obj.cast::<PyBytes>() {
return Ok(bytes.as_bytes().to_vec());
}

if let Ok(text) = obj.cast::<PyString>() {
if let Ok(text) = text.to_str() {
// Decode str losslessly as its UTF-8 bytes. ASCII is unchanged
// (ASCII == its own UTF-8); non-ASCII code points now round-trip
// correctly instead of being truncated to their low byte.
return Ok(text.as_bytes().to_vec());
}
if let Ok(text) = obj.cast::<PyString>() {
if let Ok(text) = text.to_str() {
return Ok(text.as_bytes().to_vec());
}

Err(PyErr::new::<exceptions::PyTypeError, _>(
"The argument cannot be interpreted as bytes.",
))
}

Err(PyErr::new::<exceptions::PyTypeError, _>(
"The argument cannot be interpreted as bytes.",
))
}

/// Parse a raw email (`bytes` or `str`) into a [`PyMail`].
///
/// The resulting `PyMail.attachments` lists every node of the MIME tree -- text
/// parts and the multipart container nodes -- not only file attachments; see
/// [`PyMail::attachments`] for details.
#[pyfunction]
pub fn parse_email(py: Python<'_>, payload: Py<PyAny>) -> PyResult<PyMail> {
let message = payload.to_bytes(py)?;
let message = payload_to_bytes(&payload, py)?;

mail_parser::parse_email(message.as_slice())
.map_err(|e| ParseError::new_err(format!("Message parsing error: {}", e)))
Expand Down
12 changes: 12 additions & 0 deletions src/mail_parser.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
//! PyO3-free parsing core for `fast_mail_parser`.
//!
//! This module holds the pure-Rust data model -- [`Mail`] and [`Attachment`] --
//! and the logic that turns a raw message into them. It has no dependency on
//! Python or PyO3, so it can be exercised and unit-tested independently of any
//! Python runtime.
//!
//! The companion `fast_mail_parser` module is the **PyO3 binding layer**:
//! `PyMail`/`PyAttachment` wrap these core types and convert them into Python
//! objects. Keeping the two models separate decouples the parsing logic from the
//! Python bindings.

use mailparse::*;
use std::collections::HashMap;

Expand Down