Skip to content

Commit daa2ea6

Browse files
xerialclaude
andauthored
Merge commit from fork
Improve readPayload(int) to use gradual memory allocation for large payloads (>64MB). This validates that sufficient data is available before completing allocation, improving robustness when handling untrusted input. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.5 <[email protected]>
1 parent befe3b9 commit daa2ea6

File tree

2 files changed

+197
-4
lines changed

2 files changed

+197
-4
lines changed

msgpack-core/src/main/java/org/msgpack/core/MessageUnpacker.java

Lines changed: 81 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
import java.io.Closeable;
2727
import java.io.IOException;
2828
import java.math.BigInteger;
29+
import java.util.ArrayList;
30+
import java.util.List;
2931
import java.nio.ByteBuffer;
3032
import java.nio.CharBuffer;
3133
import java.nio.charset.CharacterCodingException;
@@ -150,6 +152,14 @@ public class MessageUnpacker
150152
{
151153
private static final MessageBuffer EMPTY_BUFFER = MessageBuffer.wrap(new byte[0]);
152154

155+
/**
156+
* Threshold for switching from upfront allocation to gradual allocation.
157+
* Payloads up to this size use efficient upfront allocation.
158+
* Payloads exceeding this size use gradual allocation to detect malicious files
159+
* that declare large payload sizes but contain little actual data.
160+
*/
161+
private static final int GRADUAL_ALLOCATION_THRESHOLD = 64 * 1024 * 1024; // 64 MB
162+
153163
private final boolean allowReadingStringAsBinary;
154164
private final boolean allowReadingBinaryAsString;
155165
private final CodingErrorAction actionOnMalformedString;
@@ -1637,18 +1647,85 @@ public void readPayload(byte[] dst)
16371647
* This method allocates a new byte array and consumes specified amount of bytes into the byte array.
16381648
*
16391649
* <p>
1640-
* This method is equivalent to <code>readPayload(new byte[length])</code>.
1650+
* For sizes up to {@link #GRADUAL_ALLOCATION_THRESHOLD}, this method uses efficient upfront allocation.
1651+
* For larger sizes, it uses gradual allocation to protect against malicious files that declare
1652+
* large payload sizes but contain little actual data.
16411653
*
16421654
* @param length number of bytes to be read
16431655
* @return the new byte array
16441656
* @throws IOException when underlying input throws IOException
1657+
* @throws MessageSizeException when the input ends before the declared size is reached (for large payloads)
16451658
*/
16461659
public byte[] readPayload(int length)
16471660
throws IOException
16481661
{
1649-
byte[] newArray = new byte[length];
1650-
readPayload(newArray);
1651-
return newArray;
1662+
if (length <= GRADUAL_ALLOCATION_THRESHOLD) {
1663+
// Small/moderate size: use efficient upfront allocation
1664+
byte[] newArray = new byte[length];
1665+
readPayload(newArray);
1666+
return newArray;
1667+
}
1668+
1669+
// Large declared size: use gradual allocation to protect against malicious files
1670+
return readPayloadGradually(length);
1671+
}
1672+
1673+
/**
1674+
* Read payload gradually, allocating memory only as data becomes available.
1675+
* This method protects against malicious files that declare large payload sizes
1676+
* but contain little actual data.
1677+
*
1678+
* @param declaredLength the declared payload length
1679+
* @return the payload bytes
1680+
* @throws IOException when underlying input throws IOException
1681+
* @throws MessageSizeException when the input ends before the declared size is reached
1682+
*/
1683+
private byte[] readPayloadGradually(int declaredLength)
1684+
throws IOException
1685+
{
1686+
List<byte[]> chunks = new ArrayList<>();
1687+
int totalRead = 0;
1688+
int remaining = declaredLength;
1689+
1690+
while (remaining > 0) {
1691+
int bufferRemaining = buffer.size() - position;
1692+
if (bufferRemaining == 0) {
1693+
// Need more data from input
1694+
MessageBuffer next = in.next();
1695+
if (next == null) {
1696+
// Input ended before we read the declared size
1697+
throw new MessageSizeException(
1698+
String.format("Payload declared %,d bytes but input ended after %,d bytes",
1699+
declaredLength, totalRead),
1700+
declaredLength);
1701+
}
1702+
totalReadBytes += buffer.size();
1703+
buffer = next;
1704+
position = 0;
1705+
bufferRemaining = buffer.size();
1706+
}
1707+
1708+
int toRead = Math.min(remaining, bufferRemaining);
1709+
byte[] chunk = new byte[toRead];
1710+
buffer.getBytes(position, chunk, 0, toRead);
1711+
chunks.add(chunk);
1712+
totalRead += toRead;
1713+
position += toRead;
1714+
remaining -= toRead;
1715+
}
1716+
1717+
// All data verified to exist - combine chunks into result
1718+
if (chunks.size() == 1) {
1719+
return chunks.get(0); // Common case: single chunk, no copy needed
1720+
}
1721+
1722+
byte[] result = new byte[declaredLength];
1723+
int offset = 0;
1724+
for (byte[] chunk : chunks) {
1725+
System.arraycopy(chunk, 0, result, offset, chunk.length);
1726+
offset += chunk.length;
1727+
}
1728+
return result;
16521729
}
16531730

16541731
/**
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
package org.msgpack.core
2+
3+
import wvlet.airspec.AirSpec
4+
5+
import java.nio.ByteBuffer
6+
7+
class PayloadSizeLimitTest extends AirSpec:
8+
9+
test("detects malicious EXT32 file with huge declared size but tiny actual data") {
10+
// Craft a malicious EXT32 header:
11+
// 0xC9 = EXT32 format
12+
// 4 bytes = declared length (100MB, which exceeds 64MB threshold)
13+
// 1 byte = extension type
14+
// followed by only 1 byte of actual data (not 100MB)
15+
val declaredLength = 100 * 1024 * 1024 // 100 MB
16+
val buffer = ByteBuffer.allocate(7) // header(1) + length(4) + type(1) + data(1)
17+
buffer.put(0xC9.toByte) // EXT32 format code
18+
buffer.putInt(declaredLength) // declared length (big-endian)
19+
buffer.put(0x01.toByte) // extension type
20+
buffer.put(0x41.toByte) // only 1 byte of actual data
21+
val maliciousData = buffer.array()
22+
23+
val unpacker = MessagePack.newDefaultUnpacker(maliciousData)
24+
25+
// Should throw MessageSizeException because the declared size exceeds actual data
26+
intercept[MessageSizeException] {
27+
unpacker.unpackValue()
28+
}
29+
}
30+
31+
test("detects malicious BIN32 file with huge declared size but tiny actual data") {
32+
// Craft a malicious BIN32 header:
33+
// 0xC6 = BIN32 format
34+
// 4 bytes = declared length (100MB, which exceeds 64MB threshold)
35+
// followed by only 1 byte of actual data (not 100MB)
36+
val declaredLength = 100 * 1024 * 1024 // 100 MB
37+
val buffer = ByteBuffer.allocate(6) // header(1) + length(4) + data(1)
38+
buffer.put(0xC6.toByte) // BIN32 format code
39+
buffer.putInt(declaredLength) // declared length (big-endian)
40+
buffer.put(0x41.toByte) // only 1 byte of actual data
41+
val maliciousData = buffer.array()
42+
43+
val unpacker = MessagePack.newDefaultUnpacker(maliciousData)
44+
45+
// Should throw MessageSizeException because the declared size exceeds actual data
46+
intercept[MessageSizeException] {
47+
unpacker.unpackValue()
48+
}
49+
}
50+
51+
test("legitimate extension data works correctly") {
52+
val packer = MessagePack.newDefaultBufferPacker()
53+
val testData = Array.fill[Byte](1000)(0x42)
54+
packer.packExtensionTypeHeader(0x01.toByte, testData.length)
55+
packer.writePayload(testData)
56+
val msgpack = packer.toByteArray
57+
58+
val unpacker = MessagePack.newDefaultUnpacker(msgpack)
59+
val value = unpacker.unpackValue()
60+
61+
assert(value.isExtensionValue)
62+
assert(value.asExtensionValue().getData.length == 1000)
63+
}
64+
65+
test("legitimate binary data works correctly") {
66+
val packer = MessagePack.newDefaultBufferPacker()
67+
val testData = Array.fill[Byte](1000)(0x42)
68+
packer.packBinaryHeader(testData.length)
69+
packer.writePayload(testData)
70+
val msgpack = packer.toByteArray
71+
72+
val unpacker = MessagePack.newDefaultUnpacker(msgpack)
73+
val value = unpacker.unpackValue()
74+
75+
assert(value.isBinaryValue)
76+
assert(value.asBinaryValue().asByteArray().length == 1000)
77+
}
78+
79+
test("readPayload directly with malicious size throws exception") {
80+
// Test readPayload(int) directly with a malicious input
81+
val declaredLength = 100 * 1024 * 1024 // 100 MB
82+
val buffer = ByteBuffer.allocate(6) // header(1) + length(4) + data(1)
83+
buffer.put(0xC6.toByte) // BIN32 format code
84+
buffer.putInt(declaredLength) // declared length (big-endian)
85+
buffer.put(0x41.toByte) // only 1 byte of actual data
86+
val maliciousData = buffer.array()
87+
88+
val unpacker = MessagePack.newDefaultUnpacker(maliciousData)
89+
90+
// First, unpack the binary header to get the declared length
91+
val len = unpacker.unpackBinaryHeader()
92+
assert(len == declaredLength)
93+
94+
// Then try to read the payload - should throw exception
95+
intercept[MessageSizeException] {
96+
unpacker.readPayload(len)
97+
}
98+
}
99+
100+
test("small payloads under threshold work with upfront allocation") {
101+
// Payloads under 64MB should use the efficient upfront allocation path
102+
val packer = MessagePack.newDefaultBufferPacker()
103+
val testData = Array.fill[Byte](10000)(0x42) // 10KB, well under threshold
104+
packer.packBinaryHeader(testData.length)
105+
packer.writePayload(testData)
106+
val msgpack = packer.toByteArray
107+
108+
val unpacker = MessagePack.newDefaultUnpacker(msgpack)
109+
val len = unpacker.unpackBinaryHeader()
110+
val payload = unpacker.readPayload(len)
111+
112+
assert(payload.length == 10000)
113+
assert(payload.forall(_ == 0x42.toByte))
114+
}
115+
116+
end PayloadSizeLimitTest

0 commit comments

Comments
 (0)