diff --git a/init/aws-nitro/device/net_tap_afvsock.c b/init/aws-nitro/device/net_tap_afvsock.c index e25a9a5fe..2c648a16f 100644 --- a/init/aws-nitro/device/net_tap_afvsock.c +++ b/init/aws-nitro/device/net_tap_afvsock.c @@ -31,6 +31,64 @@ #define TUN_DEV_MAJOR 10 #define TUN_DEV_MINOR 200 +/* + * The Extended Ethernet Frame header is 14 bytes, representing the Destination + * Address (6 bytes), Source Address (6 bytes) and the Ethertype (2 bytes). + */ +#define ETH_HEADER_LEN 14 + +#define PROXY_HEADER_LEN 4 + +/* + * Read exactly n bytes into the buffer, retrying on partial reads. + * Returns n on success, 0 on clean EOF, or -1 on error. + */ +static ssize_t read_exact(int fd, void *buf, size_t n) +{ + size_t total = 0; + + while (total < n) { + ssize_t r = read(fd, (char *)buf + total, n - total); + if (r < 0) { + if (errno == EINTR) + continue; + return -1; + } else if (r == 0) { + if (total > 0) { + errno = EIO; + return -1; + } + return 0; + } + total += r; + } + return (ssize_t)total; +} + +/* + * Write exactly n bytes from the buffer to the fd, retrying on partial writes. + * Returns n on success, or -1 on error. + */ +static ssize_t write_all(int fd, const void *buf, size_t n) +{ + size_t total = 0; + + while (total < n) { + ssize_t w = write(fd, (const char *)buf + total, n - total); + if (w <= 0) { + if (w < 0 && errno == EINTR) + continue; + + if (w == 0) + errno = EIO; + + return -1; + } + total += w; + } + return (ssize_t)total; +} + /* * Forward ethernet packets to/from the host vsock providing network access and * the guest TAP device routing application network traffic. @@ -53,7 +111,7 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd, sock_fd = socket(AF_INET, SOCK_DGRAM, 0); if (sock_fd < 0) { perror("creating INET socket to get TAP MTU"); - return -errno; + return -1; } memset(&ifr, 0, sizeof(struct ifreq)); @@ -63,22 +121,29 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd, if (ret < 0) { close(sock_fd); perror("fetch MTU of TAP device"); - exit(-errno); + return -1; } close(sock_fd); - buf = (unsigned char *)malloc(ifr.ifr_mtu); + uint32_t eth_frame_size = ifr.ifr_mtu + ETH_HEADER_LEN; + buf = (unsigned char *)malloc(eth_frame_size); if (buf == NULL) { perror("allocate buffer for TAP/vsock communication"); - exit(-1); + return -1; } - // Forward the MTU to the host for it to allocate a corresponding buffer. - ret = write(vsock_fd, (void *)&ifr.ifr_mtu, sizeof(int)); - if (ret < sizeof(int)) { - perror("write TAP device MTU to host"); - exit(-errno); + // Forward the max ethernet frame size to the host for it to allocate a + // corresponding buffer. + + // To avoid issues where the host endianness and the enclave endianness is + // different, convert to big endian to pass the max ethernet frame size to + // the host. + uint32_t eth_frame_size_be = htonl(eth_frame_size); + if (write_all(vsock_fd, ð_frame_size_be, sizeof(eth_frame_size)) < 0) { + perror("write max ethernet frame size to host"); + free(buf); + return -1; } pfds[0].fd = vsock_fd; @@ -97,25 +162,81 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd, event_found = false; // Event on vsock. Read the frame and write it to the TAP device. if (pfds[0].revents & POLLIN) { - nread = read(vsock_fd, &sz, 4); - if (nread != 4) - exit(0); + nread = read_exact(vsock_fd, &sz, PROXY_HEADER_LEN); + if (nread == 0) { + // vsock connection closed cleanly + break; + } else if (nread < 0) { + perror("unable to read the proxy header from vsock"); + free(buf); + return -1; + } - unsigned int len = htonl(sz); + unsigned int len = ntohl(sz); + if (len > eth_frame_size) { + fprintf(stderr, + "ethernet frame size %u exceeds MTU + header size %u\n", + len, eth_frame_size); + free(buf); + return -1; + } + + nread = read_exact(vsock_fd, buf, len); + if (nread != (ssize_t)len) { + if (nread == 0) + errno = EIO; + + perror("failed to read the ethernet frame from vsock"); + free(buf); + return -1; + } - nread = read(vsock_fd, buf, len); - write(tun_fd, buf, nread); + // TAP devices are expected to write an entire frame at once and not + // do partial writes. Only retry if the syscall is interrupted. + ssize_t bytes_written = 0; + do { + bytes_written = write(tun_fd, buf, nread); + } while (bytes_written < 0 && errno == EINTR); + + if (bytes_written != nread) { + // the entire frame wasn't written + if (bytes_written >= 0) + errno = EIO; + + perror("unable to write the ethernet frame to the TAP device"); + free(buf); + return -1; + } event_found = true; } // Event on the TAP device. Read the frame and write it to the vsock. if (pfds[1].revents & POLLIN) { - nread = read(tun_fd, buf, ifr.ifr_mtu); - if (nread > 0) { - sz = htonl(nread); - write(vsock_fd, (void *)&sz, 4); - write(vsock_fd, buf, nread); + // TAP devices are expected to read an entire frame at once and not + // do partial reads. Only retry if the syscall is interrupted. + do { + nread = read(tun_fd, buf, eth_frame_size); + } while (nread < 0 && errno == EINTR); + if (nread <= 0) { + if (nread == 0) + errno = EIO; + + perror("failed to read the ethernet frame from the TAP device"); + free(buf); + return -1; + } + + sz = htonl((uint32_t)nread); + if (write_all(vsock_fd, (void *)&sz, PROXY_HEADER_LEN) < 0) { + perror("unable to write the proxy header to vsock"); + free(buf); + return -1; + } + if (write_all(vsock_fd, buf, nread) < 0) { + perror("unable to write the ethernet frame to vsock"); + free(buf); + return -1; } event_found = true; @@ -132,6 +253,7 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd, break; } + free(buf); close(vsock_fd); close(tun_fd); diff --git a/src/aws_nitro/src/enclave/proxy/proxies/net.rs b/src/aws_nitro/src/enclave/proxy/proxies/net.rs index ef98570ea..5dbbbe338 100644 --- a/src/aws_nitro/src/enclave/proxy/proxies/net.rs +++ b/src/aws_nitro/src/enclave/proxy/proxies/net.rs @@ -95,14 +95,14 @@ impl DeviceProxy for NetProxy { let (mut vsock, _) = listener.accept().map_err(Error::VsockAccept)?; /* - * Upon initial connection, read the MTU size from the enclave and allocate the buffer + * Upon initial connection, read the MTU size + ethernet frame header from the enclave and allocate the buffer * accordingly. */ let size = { let mut size_buf = [0u8; size_of::()]; - let _ = vsock.read(&mut size_buf).map_err(Error::VsockRead)?; + vsock.read_exact(&mut size_buf).map_err(Error::VsockRead)?; - u32::from_ne_bytes(size_buf) + u32::from_be_bytes(size_buf) }; self.buf