Skip to content

Commit 396a9f5

Browse files
committed
init/aws-nitro: support partial read/write ethernet packet forwarding
Ensure the total requested number of bytes is forwarded to/from the host vsock providing the network access and the guest TAP device routing the application network traffic. To write this code I used Cursor and the claude-4.6-opus-high model. Signed-off-by: Jake Correnti <jakecorrenti+github@proton.me>
1 parent 7c5292c commit 396a9f5

2 files changed

Lines changed: 139 additions & 22 deletions

File tree

init/aws-nitro/device/net_tap_afvsock.c

Lines changed: 137 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,64 @@
3131
#define TUN_DEV_MAJOR 10
3232
#define TUN_DEV_MINOR 200
3333

34+
/*
35+
* The Extended Ethernet Frame header is 14 bytes, representing the Destination
36+
* Address (6 bytes), Source Address (6 bytes) and the Ethertype (2 bytes).
37+
*/
38+
#define ETH_HEADER_LEN 14
39+
40+
#define PROXY_HEADER_LEN 4
41+
42+
/*
43+
* Read exactly n bytes into the buffer, retrying on partial reads.
44+
* Returns n on success, 0 on clean EOF, or -1 on error.
45+
*/
46+
static ssize_t read_exact(int fd, void *buf, size_t n)
47+
{
48+
size_t total = 0;
49+
50+
while (total < n) {
51+
ssize_t r = read(fd, (char *)buf + total, n - total);
52+
if (r < 0) {
53+
if (errno == EINTR)
54+
continue;
55+
return -1;
56+
} else if (r == 0) {
57+
if (total > 0) {
58+
errno = EIO;
59+
return -1;
60+
}
61+
return 0;
62+
}
63+
total += r;
64+
}
65+
return (ssize_t)total;
66+
}
67+
68+
/*
69+
* Write exactly n bytes from the buffer to the fd, retrying on partial writes.
70+
* Returns n on success, or -1 on error.
71+
*/
72+
static ssize_t write_all(int fd, const void *buf, size_t n)
73+
{
74+
size_t total = 0;
75+
76+
while (total < n) {
77+
ssize_t w = write(fd, (const char *)buf + total, n - total);
78+
if (w <= 0) {
79+
if (w < 0 && errno == EINTR)
80+
continue;
81+
82+
if (w == 0)
83+
errno = EIO;
84+
85+
return -1;
86+
}
87+
total += w;
88+
}
89+
return (ssize_t)total;
90+
}
91+
3492
/*
3593
* Forward ethernet packets to/from the host vsock providing network access and
3694
* the guest TAP device routing application network traffic.
@@ -53,7 +111,7 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd,
53111
sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
54112
if (sock_fd < 0) {
55113
perror("creating INET socket to get TAP MTU");
56-
return -errno;
114+
return -1;
57115
}
58116

59117
memset(&ifr, 0, sizeof(struct ifreq));
@@ -63,22 +121,24 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd,
63121
if (ret < 0) {
64122
close(sock_fd);
65123
perror("fetch MTU of TAP device");
66-
exit(-errno);
124+
return -1;
67125
}
68126

69127
close(sock_fd);
70128

71-
buf = (unsigned char *)malloc(ifr.ifr_mtu);
129+
uint32_t eth_frame_size = ifr.ifr_mtu + ETH_HEADER_LEN;
130+
buf = (unsigned char *)malloc(eth_frame_size);
72131
if (buf == NULL) {
73132
perror("allocate buffer for TAP/vsock communication");
74-
exit(-1);
133+
return -1;
75134
}
76135

77-
// Forward the MTU to the host for it to allocate a corresponding buffer.
78-
ret = write(vsock_fd, (void *)&ifr.ifr_mtu, sizeof(int));
79-
if (ret < sizeof(int)) {
80-
perror("write TAP device MTU to host");
81-
exit(-errno);
136+
// Forward the max ethernet frame size to the host for it to allocate a
137+
// corresponding buffer.
138+
if (write_all(vsock_fd, &eth_frame_size, sizeof(eth_frame_size)) < 0) {
139+
perror("write max ethernet frame size to host");
140+
free(buf);
141+
return -1;
82142
}
83143

84144
pfds[0].fd = vsock_fd;
@@ -97,25 +157,81 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd,
97157
event_found = false;
98158
// Event on vsock. Read the frame and write it to the TAP device.
99159
if (pfds[0].revents & POLLIN) {
100-
nread = read(vsock_fd, &sz, 4);
101-
if (nread != 4)
102-
exit(0);
160+
nread = read_exact(vsock_fd, &sz, PROXY_HEADER_LEN);
161+
if (nread == 0) {
162+
// vsock connection closed cleanly
163+
break;
164+
} else if (nread < 0) {
165+
perror("unable to read the proxy header from vsock");
166+
free(buf);
167+
return -1;
168+
}
103169

104-
unsigned int len = htonl(sz);
170+
unsigned int len = ntohl(sz);
171+
if (len > eth_frame_size) {
172+
fprintf(stderr,
173+
"ethernet frame size %u exceeds MTU + header size %u\n",
174+
len, eth_frame_size);
175+
free(buf);
176+
return -1;
177+
}
178+
179+
nread = read_exact(vsock_fd, buf, len);
180+
if (nread != (ssize_t)len) {
181+
if (nread == 0)
182+
errno = EIO;
183+
184+
perror("failed to read the ethernet frame from vsock");
185+
free(buf);
186+
return -1;
187+
}
105188

106-
nread = read(vsock_fd, buf, len);
107-
write(tun_fd, buf, nread);
189+
// TAP devices are expected to write an entire frame at once and not
190+
// do partial writes. Only retry if the syscall is interrupted.
191+
ssize_t bytes_written = 0;
192+
do {
193+
bytes_written = write(tun_fd, buf, nread);
194+
} while (bytes_written < 0 && errno == EINTR);
195+
196+
if (bytes_written != nread) {
197+
// the entire frame wasn't written
198+
if (bytes_written >= 0)
199+
errno = EIO;
200+
201+
perror("unable to write the ethernet frame to the TAP device");
202+
free(buf);
203+
return -1;
204+
}
108205

109206
event_found = true;
110207
}
111208

112209
// Event on the TAP device. Read the frame and write it to the vsock.
113210
if (pfds[1].revents & POLLIN) {
114-
nread = read(tun_fd, buf, ifr.ifr_mtu);
115-
if (nread > 0) {
116-
sz = htonl(nread);
117-
write(vsock_fd, (void *)&sz, 4);
118-
write(vsock_fd, buf, nread);
211+
// TAP devices are expected to read an entire frame at once and not
212+
// do partial reads. Only retry if the syscall is interrupted.
213+
do {
214+
nread = read(tun_fd, buf, eth_frame_size);
215+
} while (nread < 0 && errno == EINTR);
216+
if (nread <= 0) {
217+
if (nread == 0)
218+
errno = EIO;
219+
220+
perror("failed to read the ethernet frame from the TAP device");
221+
free(buf);
222+
return -1;
223+
}
224+
225+
sz = htonl((uint32_t)nread);
226+
if (write_all(vsock_fd, (void *)&sz, PROXY_HEADER_LEN) < 0) {
227+
perror("unable to write the proxy header to vsock");
228+
free(buf);
229+
return -1;
230+
}
231+
if (write_all(vsock_fd, buf, nread) < 0) {
232+
perror("unable to write the ethernet frame to vsock");
233+
free(buf);
234+
return -1;
119235
}
120236

121237
event_found = true;
@@ -132,6 +248,7 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd,
132248
break;
133249
}
134250

251+
free(buf);
135252
close(vsock_fd);
136253
close(tun_fd);
137254

src/aws_nitro/src/enclave/proxy/proxies/net.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,12 @@ impl DeviceProxy for NetProxy {
9595
let (mut vsock, _) = listener.accept().map_err(Error::VsockAccept)?;
9696

9797
/*
98-
* Upon initial connection, read the MTU size from the enclave and allocate the buffer
98+
* Upon initial connection, read the MTU size + ethernet frame header from the enclave and allocate the buffer
9999
* accordingly.
100100
*/
101101
let size = {
102102
let mut size_buf = [0u8; size_of::<u32>()];
103-
let _ = vsock.read(&mut size_buf).map_err(Error::VsockRead)?;
103+
vsock.read_exact(&mut size_buf).map_err(Error::VsockRead)?;
104104

105105
u32::from_ne_bytes(size_buf)
106106
};

0 commit comments

Comments
 (0)