You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2021/07/29 21:40:47 UTC

[hive] branch storage-branch-2.7 updated: HIVE-25400: Move the offset updating in BytesColumnVector to setValPreallocated

This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch storage-branch-2.7
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/storage-branch-2.7 by this push:
     new 640a366  HIVE-25400: Move the offset updating in BytesColumnVector to setValPreallocated
640a366 is described below

commit 640a366be25e3fe8e33eb7324b44e6b6ee2db645
Author: Owen O'Malley <oo...@linkedin.com>
AuthorDate: Wed Jul 28 10:41:04 2021 -0700

    HIVE-25400: Move the offset updating in BytesColumnVector to setValPreallocated
    
    This change moves the update to the sharedBufferOffset to setValPreallocated. It
    also means the internal code also needs to call setValPreallocated rather than
    use the direct access to the values.
    
    Fixes #2543
    
    Signed-off-by: Owen O'Malley <oo...@linkedin.com>
---
 .../hive/ql/exec/vector/BytesColumnVector.java     | 34 +++++++++-----------
 .../hive/ql/exec/vector/TestBytesColumnVector.java | 37 ++++++++++++++++++++++
 2 files changed, 52 insertions(+), 19 deletions(-)

diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
index 4782661..e541233 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
@@ -183,9 +183,7 @@ public class BytesColumnVector extends ColumnVector {
     if (length > 0) {
       System.arraycopy(sourceBuf, start, currentValue, currentOffset, length);
     }
-    vector[elementNum] = currentValue;
-    this.start[elementNum] = currentOffset;
-    this.length[elementNum] = length;
+    setValPreallocated(elementNum, length);
   }
 
   /**
@@ -209,18 +207,18 @@ public class BytesColumnVector extends ColumnVector {
    * Ensures that we have space allocated for the next value, which has size
    * length bytes.
    *
-   * Updates currentValue, currentOffset, and sharedBufferOffset for this value.
+   * Updates currentValue and currentOffset for this value.
    *
-   * Always use before getValPreallocatedBytes, getValPreallocatedStart,
-   * and setValPreallocated.
+   * Always use before getValPreallocatedBytes, getValPreallocatedStart.
+   * setValPreallocated must be called to actually reserve the bytes.
    */
   public void ensureValPreallocated(int length) {
     if ((sharedBufferOffset + length) > sharedBuffer.length) {
-      currentValue = allocateBuffer(length);
+      // sets currentValue and currentOffset
+      allocateBuffer(length);
     } else {
       currentValue = sharedBuffer;
       currentOffset = sharedBufferOffset;
-      sharedBufferOffset += length;
     }
   }
 
@@ -241,6 +239,10 @@ public class BytesColumnVector extends ColumnVector {
     vector[elementNum] = currentValue;
     this.start[elementNum] = currentOffset;
     this.length[elementNum] = length;
+    // If the current value is the shared buffer, move the next offset forward.
+    if (currentValue == sharedBuffer) {
+      sharedBufferOffset += length;
+    }
   }
 
   /**
@@ -259,9 +261,7 @@ public class BytesColumnVector extends ColumnVector {
       byte[] rightSourceBuf, int rightStart, int rightLen) {
     int newLen = leftLen + rightLen;
     ensureValPreallocated(newLen);
-    vector[elementNum] = currentValue;
-    this.start[elementNum] = currentOffset;
-    this.length[elementNum] = newLen;
+    setValPreallocated(elementNum, newLen);
 
     System.arraycopy(leftSourceBuf, leftStart, currentValue, currentOffset, leftLen);
     System.arraycopy(rightSourceBuf, rightStart, currentValue,
@@ -270,9 +270,7 @@ public class BytesColumnVector extends ColumnVector {
 
   /**
    * Allocate/reuse enough buffer space to accommodate next element.
-   * currentOffset is set to the first available byte in the returned array.
-   * If sharedBuffer is used, sharedBufferOffset is updated to point after the
-   * current record.
+   * Sets currentValue and currentOffset to the correct buffer and offset.
    *
    * This uses an exponential increase mechanism to rapidly
    * increase buffer size to enough to hold all data.
@@ -280,9 +278,8 @@ public class BytesColumnVector extends ColumnVector {
    * stabilize.
    *
    * @param nextElemLength size of next element to be added
-   * @return the buffer to use for the next element
    */
-  private byte[] allocateBuffer(int nextElemLength) {
+  private void allocateBuffer(int nextElemLength) {
     // If this is a large value or shared buffer is maxed out, allocate a
     // single use buffer. Assumes that sharedBuffer length and
     // MAX_SIZE_FOR_SHARED_BUFFER are powers of 2.
@@ -290,8 +287,8 @@ public class BytesColumnVector extends ColumnVector {
         sharedBufferOffset + nextElemLength >= MAX_SIZE_FOR_SHARED_BUFFER) {
       // allocate a value for the next value
       ++bufferAllocationCount;
+      currentValue = new byte[nextElemLength];
       currentOffset = 0;
-      return new byte[nextElemLength];
     } else {
 
       // sharedBuffer might still be out of space
@@ -304,9 +301,8 @@ public class BytesColumnVector extends ColumnVector {
         ++bufferAllocationCount;
         sharedBufferOffset = 0;
       }
+      currentValue = sharedBuffer;
       currentOffset = sharedBufferOffset;
-      sharedBufferOffset += nextElemLength;
-      return sharedBuffer;
     }
   }
 
diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java
index 031ede7..71694b8 100644
--- a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java
+++ b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java
@@ -175,6 +175,43 @@ public class TestBytesColumnVector {
     }
   }
 
+  /**
+   * Make sure that ORC's redact mask won't get into trouble by increasing
+   * the size of the allocation before calling setValPreallocated.
+   */
+  @Test
+  public void testMultipleReallocations() {
+    BytesColumnVector vector = new BytesColumnVector();
+    vector.reset();
+
+    // add a value
+    vector.ensureValPreallocated(1000);
+    byte[] bytes = vector.getValPreallocatedBytes();
+    assertEquals(16 * 1024, bytes.length);
+    assertEquals(0, vector.getValPreallocatedStart());
+    // now ask for more memory
+    vector.ensureValPreallocated(5000);
+    // It shouldn't have needed to resize
+    assertSame(bytes, vector.getValPreallocatedBytes());
+    assertEquals(0, vector.getValPreallocatedStart());
+    // actually use 2000 of the bytes
+    vector.setValPreallocated(0, 2000);
+
+    // add another value
+    vector.ensureValPreallocated(10000);
+    assertSame(bytes, vector.getValPreallocatedBytes());
+    assertEquals(2000, vector.getValPreallocatedStart());
+    vector.setValPreallocated(1, 3000);
+
+    // check to make sure the values are allocated correctly
+    assertSame(bytes, vector.vector[0]);
+    assertEquals(0, vector.start[0]);
+    assertEquals(2000, vector.length[0]);
+    assertSame(bytes, vector.vector[1]);
+    assertEquals(2000, vector.start[1]);
+    assertEquals(3000, vector.length[1]);
+  }
+
   // Write a value to the column vector, and return back the byte buffer used.
   private static byte[] writeToBytesColumnVector(int rowIdx, BytesColumnVector col, int writeSize, int val) {
     col.ensureValPreallocated(writeSize);