Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 142 additions & 20 deletions init/aws-nitro/device/net_tap_afvsock.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,64 @@
#define TUN_DEV_MAJOR 10
#define TUN_DEV_MINOR 200

/*
* The Extended Ethernet Frame header is 14 bytes, representing the Destination
* Address (6 bytes), Source Address (6 bytes) and the Ethertype (2 bytes).
*/
#define ETH_HEADER_LEN 14

#define PROXY_HEADER_LEN 4

/*
* Read exactly n bytes into the buffer, retrying on partial reads.
* Returns n on success, 0 on clean EOF, or -1 on error.
*/
static ssize_t read_exact(int fd, void *buf, size_t n)
{
size_t total = 0;

while (total < n) {
ssize_t r = read(fd, (char *)buf + total, n - total);
if (r < 0) {
if (errno == EINTR)
continue;
return -1;
} else if (r == 0) {
if (total > 0) {
errno = EIO;
return -1;
}
return 0;
}
total += r;
}
return (ssize_t)total;
}

/*
* Write exactly n bytes from the buffer to the fd, retrying on partial writes.
* Returns n on success, or -1 on error.
*/
static ssize_t write_all(int fd, const void *buf, size_t n)
{
size_t total = 0;

while (total < n) {
ssize_t w = write(fd, (const char *)buf + total, n - total);
if (w <= 0) {
if (w < 0 && errno == EINTR)
continue;

if (w == 0)
errno = EIO;

return -1;
}
total += w;
}
return (ssize_t)total;
}

/*
* Forward ethernet packets to/from the host vsock providing network access and
* the guest TAP device routing application network traffic.
Expand All @@ -53,7 +111,7 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd,
sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
if (sock_fd < 0) {
perror("creating INET socket to get TAP MTU");
return -errno;
return -1;
}

memset(&ifr, 0, sizeof(struct ifreq));
Expand All @@ -63,22 +121,29 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd,
if (ret < 0) {
close(sock_fd);
perror("fetch MTU of TAP device");
exit(-errno);
return -1;
}

close(sock_fd);

buf = (unsigned char *)malloc(ifr.ifr_mtu);
uint32_t eth_frame_size = ifr.ifr_mtu + ETH_HEADER_LEN;
buf = (unsigned char *)malloc(eth_frame_size);
if (buf == NULL) {
perror("allocate buffer for TAP/vsock communication");
exit(-1);
return -1;
}

// Forward the MTU to the host for it to allocate a corresponding buffer.
ret = write(vsock_fd, (void *)&ifr.ifr_mtu, sizeof(int));
if (ret < sizeof(int)) {
perror("write TAP device MTU to host");
exit(-errno);
// Forward the max ethernet frame size to the host for it to allocate a
// corresponding buffer.

// To avoid issues where the host endianness and the enclave endianness is
// different, convert to big endian to pass the max ethernet frame size to
// the host.
uint32_t eth_frame_size_be = htonl(eth_frame_size);
if (write_all(vsock_fd, &eth_frame_size_be, sizeof(eth_frame_size)) < 0) {
perror("write max ethernet frame size to host");
free(buf);
return -1;
}

pfds[0].fd = vsock_fd;
Expand All @@ -97,25 +162,81 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd,
event_found = false;
// Event on vsock. Read the frame and write it to the TAP device.
if (pfds[0].revents & POLLIN) {
nread = read(vsock_fd, &sz, 4);
if (nread != 4)
exit(0);
nread = read_exact(vsock_fd, &sz, PROXY_HEADER_LEN);
if (nread == 0) {
// vsock connection closed cleanly
break;
} else if (nread < 0) {
perror("unable to read the proxy header from vsock");
free(buf);
return -1;
}

unsigned int len = htonl(sz);
unsigned int len = ntohl(sz);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Similar to the sz variable, len should be defined as uint32_t to match the expected return type of ntohl and ensure consistent behavior when comparing against eth_frame_size. This ensures the code is explicit and portable, aligning with best practices for network protocol implementation.

            uint32_t len = ntohl(sz);
References
  1. Prioritize code readability and focused commits over micro-optimizations. Refactoring for minor efficiency gains may be rejected if it harms readability or expands the scope of a change.

if (len > eth_frame_size) {
fprintf(stderr,
"ethernet frame size %u exceeds MTU + header size %u\n",
len, eth_frame_size);
free(buf);
return -1;
}

nread = read_exact(vsock_fd, buf, len);
if (nread != (ssize_t)len) {
if (nread == 0)
errno = EIO;

perror("failed to read the ethernet frame from vsock");
free(buf);
return -1;
}

nread = read(vsock_fd, buf, len);
write(tun_fd, buf, nread);
// TAP devices are expected to write an entire frame at once and not
// do partial writes. Only retry if the syscall is interrupted.
ssize_t bytes_written = 0;
do {
bytes_written = write(tun_fd, buf, nread);
} while (bytes_written < 0 && errno == EINTR);

if (bytes_written != nread) {
// the entire frame wasn't written
if (bytes_written >= 0)
errno = EIO;

perror("unable to write the ethernet frame to the TAP device");
free(buf);
return -1;
}

event_found = true;
}

// Event on the TAP device. Read the frame and write it to the vsock.
if (pfds[1].revents & POLLIN) {
nread = read(tun_fd, buf, ifr.ifr_mtu);
if (nread > 0) {
sz = htonl(nread);
write(vsock_fd, (void *)&sz, 4);
write(vsock_fd, buf, nread);
// TAP devices are expected to read an entire frame at once and not
// do partial reads. Only retry if the syscall is interrupted.
do {
nread = read(tun_fd, buf, eth_frame_size);
} while (nread < 0 && errno == EINTR);
if (nread <= 0) {
if (nread == 0)
errno = EIO;

perror("failed to read the ethernet frame from the TAP device");
free(buf);
return -1;
}

sz = htonl((uint32_t)nread);
if (write_all(vsock_fd, (void *)&sz, PROXY_HEADER_LEN) < 0) {
perror("unable to write the proxy header to vsock");
free(buf);
return -1;
}
if (write_all(vsock_fd, buf, nread) < 0) {
perror("unable to write the ethernet frame to vsock");
free(buf);
return -1;
}

event_found = true;
Expand All @@ -132,6 +253,7 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd,
break;
}

free(buf);
close(vsock_fd);
close(tun_fd);

Expand Down
6 changes: 3 additions & 3 deletions src/aws_nitro/src/enclave/proxy/proxies/net.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,14 @@ impl DeviceProxy for NetProxy {
let (mut vsock, _) = listener.accept().map_err(Error::VsockAccept)?;

/*
* Upon initial connection, read the MTU size from the enclave and allocate the buffer
* Upon initial connection, read the MTU size + ethernet frame header from the enclave and allocate the buffer
* accordingly.
*/
let size = {
let mut size_buf = [0u8; size_of::<u32>()];
let _ = vsock.read(&mut size_buf).map_err(Error::VsockRead)?;
vsock.read_exact(&mut size_buf).map_err(Error::VsockRead)?;

u32::from_ne_bytes(size_buf)
u32::from_be_bytes(size_buf)
};

self.buf
Expand Down
Loading