Skip to content

Commit 7b50fe6

Browse files
author
Yicong Huang
committed
fix: make sure capacity is not exceeded
1 parent 230d184 commit 7b50fe6

2 files changed

Lines changed: 37 additions & 6 deletions

File tree

vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,9 @@ public BaseLargeVariableWidthVector(Field field, final BufferAllocator allocator
7171
lastValueCapacity = INITIAL_VALUE_ALLOCATION - 1;
7272
valueCount = 0;
7373
lastSet = -1;
74-
offsetBuffer = allocator.getEmpty();
74+
// Allocate offset buffer with at least OFFSET_WIDTH capacity to ensure
75+
// offset[0] is always available according to Arrow spec.
76+
offsetBuffer = allocateOffsetBuffer(OFFSET_WIDTH);
7577
validityBuffer = allocator.getEmpty();
7678
valueBuffer = allocator.getEmpty();
7779
}
@@ -383,7 +385,19 @@ private void setReaderAndWriterIndex() {
383385
// Both are set to 0 means 0 bytes are written to the IPC stream which will crash IPC readers
384386
// in other libraries. According to Arrow spec, we should still output the offset buffer which
385387
// is [0].
386-
offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH);
388+
final long requiredOffsetBufferSize = (long) (valueCount + 1) * OFFSET_WIDTH;
389+
if (offsetBuffer.capacity() < requiredOffsetBufferSize) {
390+
// Allocate a new buffer with sufficient capacity. This can happen when vector
391+
// was loaded via loadFieldBuffers() with an empty offset buffer.
392+
ArrowBuf newOffsetBuffer = allocateOffsetBuffer(requiredOffsetBufferSize);
393+
// Copy existing data if any
394+
if (offsetBuffer.capacity() > 0) {
395+
newOffsetBuffer.setBytes(0, offsetBuffer, 0, offsetBuffer.capacity());
396+
}
397+
offsetBuffer.getReferenceManager().release();
398+
offsetBuffer = newOffsetBuffer;
399+
}
400+
offsetBuffer.writerIndex(requiredOffsetBufferSize);
387401
}
388402

389403
/** Same as {@link #allocateNewSafe()}. */
@@ -495,7 +509,9 @@ private void allocateBytes(final long valueBufferSize, final int valueCount) {
495509

496510
/* allocate offset buffer */
497511
private ArrowBuf allocateOffsetBuffer(final long size) {
498-
ArrowBuf offsetBuffer = allocator.buffer(size);
512+
// Ensure at least OFFSET_WIDTH capacity according to Arrow spec
513+
final long curSize = Math.max(size, OFFSET_WIDTH);
514+
ArrowBuf offsetBuffer = allocator.buffer(curSize);
499515
offsetBuffer.readerIndex(0);
500516
offsetBuffer.setZero(0, offsetBuffer.capacity());
501517
return offsetBuffer;

vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,9 @@ public BaseVariableWidthVector(Field field, final BufferAllocator allocator) {
6969
lastValueCapacity = INITIAL_VALUE_ALLOCATION - 1;
7070
valueCount = 0;
7171
lastSet = -1;
72-
offsetBuffer = allocator.getEmpty();
72+
// Allocate offset buffer with at least OFFSET_WIDTH capacity to ensure
73+
// offset[0] is always available according to Arrow spec.
74+
offsetBuffer = allocateOffsetBuffer(OFFSET_WIDTH);
7375
validityBuffer = allocator.getEmpty();
7476
valueBuffer = allocator.getEmpty();
7577
}
@@ -399,7 +401,19 @@ private void setReaderAndWriterIndex() {
399401
// Both are set to 0 means 0 bytes are written to the IPC stream which will crash IPC readers
400402
// in other libraries. According to Arrow spec, we should still output the offset buffer which
401403
// is [0].
402-
offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH);
404+
final long requiredOffsetBufferSize = (long) (valueCount + 1) * OFFSET_WIDTH;
405+
if (offsetBuffer.capacity() < requiredOffsetBufferSize) {
406+
// Allocate a new buffer with sufficient capacity. This can happen when vector
407+
// was loaded via loadFieldBuffers() with an empty offset buffer.
408+
ArrowBuf newOffsetBuffer = allocateOffsetBuffer(requiredOffsetBufferSize);
409+
// Copy existing data if any
410+
if (offsetBuffer.capacity() > 0) {
411+
newOffsetBuffer.setBytes(0, offsetBuffer, 0, offsetBuffer.capacity());
412+
}
413+
offsetBuffer.getReferenceManager().release();
414+
offsetBuffer = newOffsetBuffer;
415+
}
416+
offsetBuffer.writerIndex(requiredOffsetBufferSize);
403417
}
404418

405419
/** Same as {@link #allocateNewSafe()}. */
@@ -512,7 +526,8 @@ private void allocateBytes(final long valueBufferSize, final int valueCount) {
512526

513527
/* allocate offset buffer */
514528
private ArrowBuf allocateOffsetBuffer(final long size) {
515-
final int curSize = (int) size;
529+
// Ensure at least OFFSET_WIDTH capacity according to Arrow spec
530+
final int curSize = (int) Math.max(size, OFFSET_WIDTH);
516531
ArrowBuf offsetBuffer = allocator.buffer(curSize);
517532
offsetBuffer.readerIndex(0);
518533
offsetBuffer.setZero(0, offsetBuffer.capacity());

0 commit comments

Comments
 (0)