You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ma...@apache.org on 2020/03/19 02:41:06 UTC

[hive] branch master updated: HIVE-23034 : Arrow serializer should not keep the reference of arrow offset and validity buffers. (Shubham Chaurasia, reviewed by Thejas Nair)

This is an automated email from the ASF dual-hosted git repository.

mahesh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 6f9ae63  HIVE-23034 : Arrow serializer should not keep the reference of arrow offset and validity buffers. (Shubham Chaurasia, reviewed by Thejas Nair)
6f9ae63 is described below

commit 6f9ae63e78eb0d529f3cd96f51f93247783265bc
Author: Shubham Chaurasia <sc...@cloudera.com>
AuthorDate: Thu Mar 19 08:08:53 2020 +0530

    HIVE-23034 : Arrow serializer should not keep the reference of arrow offset and validity buffers. (Shubham Chaurasia, reviewed by Thejas Nair)
    
    Signed-off-by: Mahesh Kumar Behera <ma...@apache.org>
---
 .../org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java | 47 ++++++++++++++++++++++
 ...iniLlap.java => BaseJdbcWithMiniLlap.java.orig} |  0
 .../apache/hadoop/hive/ql/io/arrow/Serializer.java | 17 ++++----
 3 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java
index 4c46db9..3973ec9 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java
@@ -360,6 +360,53 @@ public abstract class BaseJdbcWithMiniLlap {
     assertEquals(0, rowCount);
   }
 
+  @Test(timeout = 300000)
+  public void testInvalidReferenceCountScenario() throws Exception {
+    final String tableName = "testInvalidReferenceCountScenario";
+    try (Statement stmt = hs2Conn.createStatement()) {
+      String createQuery =
+          "create table " + tableName +
+              "(arr1 array<struct<f1:string,f2:string,arr2:array<struct<f3:string,f4:string,f5:string>>>>, "
+              + "c2 int) STORED AS ORC";
+
+      // create table
+      stmt.execute("DROP TABLE IF EXISTS " + tableName);
+      stmt.execute(createQuery);
+      // load data
+      stmt.execute("INSERT INTO " + tableName + "  VALUES "
+          // value 1
+          + "(ARRAY(NAMED_STRUCT('f1','a1', "
+          + "'f2','a2', "
+          + "'arr2',"
+          + " ARRAY("
+          + "NAMED_STRUCT('f3', cast(null as string), 'f4', cast(null as string), 'f5', cast(null as string)))), "
+          + "NAMED_STRUCT('f1','a1', 'f2','a2', 'arr2', "
+          + "ARRAY(NAMED_STRUCT('f3', 'fielddddddd3333333', 'f4', 'field4', 'f5', 'field5'))), "
+          + "NAMED_STRUCT('f1','a1', 'f2','a2', 'arr2', ARRAY(NAMED_STRUCT('f3', cast(null as string), "
+          + "'f4', cast(null as string), 'f5', cast(null as string)))), "
+          + "NAMED_STRUCT('f1','a1', 'f2','a2', 'arr2', ARRAY(NAMED_STRUCT('f3', 'fielddddddd3333333', "
+          + "'f4', 'field4', 'f5', 'field5'))), NAMED_STRUCT('f1','a1', 'f2','a2', 'arr2', "
+          + "ARRAY(NAMED_STRUCT('f3', cast(null as string), 'f4', cast(null as string), 'f5', cast(null as string)))),"
+          + " NAMED_STRUCT('f1','a1', 'f2','a2', 'arr2', "
+          + "ARRAY(NAMED_STRUCT('f3', 'fielddddddd3333333', 'f4', 'field4', 'f5', 'field5')))), 1)");
+
+      // generate 16384 rows from above records
+      for (int i = 0; i < 14; i++) {
+        stmt.execute(String.format("insert into %s select * from %s", tableName, tableName));
+      }
+      // validate test table
+      ResultSet res = stmt.executeQuery("SELECT count(*) FROM " + tableName);
+      assertTrue(res.next());
+      assertEquals(16384, res.getInt(1));
+      res.close();
+    }
+    // should not throw - IllegalReferenceCountException: refCnt: 0
+    RowCollector rowCollector = new RowCollector();
+    String query = "select * from " + tableName;
+    int rowCount = processQuery(query, 1, rowCollector);
+    assertEquals(16384, rowCount);
+
+  }
 
   @Test(timeout = 60000)
   public void testNonAsciiStrings() throws Exception {
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java.orig
similarity index 100%
copy from itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java
copy to itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java.orig
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
index 0350977..12da074 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
@@ -313,16 +313,15 @@ public class Serializer {
 
     write(arrowVector, structListVector, structListTypeInfo, size, vectorizedRowBatch, isNative);
 
-    final ArrowBuf validityBuffer = arrowVector.getValidityBuffer();
     for (int rowIndex = 0; rowIndex < size; rowIndex++) {
       int selectedIndex = rowIndex;
       if (vectorizedRowBatch.selectedInUse) {
         selectedIndex = vectorizedRowBatch.selected[rowIndex];
       }
       if (hiveVector.isNull[selectedIndex]) {
-        BitVectorHelper.setValidityBit(validityBuffer, rowIndex, 0);
+        BitVectorHelper.setValidityBit(arrowVector.getValidityBuffer(), rowIndex, 0);
       } else {
-        BitVectorHelper.setValidityBitToOne(validityBuffer, rowIndex);
+        BitVectorHelper.setValidityBitToOne(arrowVector.getValidityBuffer(), rowIndex);
       }
     }
   }
@@ -360,12 +359,11 @@ public class Serializer {
       write(arrowFieldVector, hiveFieldVector, fieldTypeInfo, size, vectorizedRowBatch, isNative);
     }
 
-    final ArrowBuf validityBuffer = arrowVector.getValidityBuffer();
     for (int rowIndex = 0; rowIndex < size; rowIndex++) {
       if (hiveVector.isNull[rowIndex]) {
-        BitVectorHelper.setValidityBit(validityBuffer, rowIndex, 0);
+        BitVectorHelper.setValidityBit(arrowVector.getValidityBuffer(), rowIndex, 0);
       } else {
-        BitVectorHelper.setValidityBitToOne(validityBuffer, rowIndex);
+        BitVectorHelper.setValidityBitToOne(arrowVector.getValidityBuffer(), rowIndex);
       }
     }
   }
@@ -426,7 +424,6 @@ public class Serializer {
 
     write(arrowElementVector, hiveElementVector, elementTypeInfo, correctedSize, correctedVrb, isNative);
 
-    final ArrowBuf offsetBuffer = arrowVector.getOffsetBuffer();
     int nextOffset = 0;
 
     for (int rowIndex = 0; rowIndex < size; rowIndex++) {
@@ -435,14 +432,14 @@ public class Serializer {
         selectedIndex = vectorizedRowBatch.selected[rowIndex];
       }
       if (hiveVector.isNull[selectedIndex]) {
-        offsetBuffer.setInt(rowIndex * OFFSET_WIDTH, nextOffset);
+        arrowVector.getOffsetBuffer().setInt(rowIndex * OFFSET_WIDTH, nextOffset);
       } else {
-        offsetBuffer.setInt(rowIndex * OFFSET_WIDTH, nextOffset);
+        arrowVector.getOffsetBuffer().setInt(rowIndex * OFFSET_WIDTH, nextOffset);
         nextOffset += (int) hiveVector.lengths[selectedIndex];
         arrowVector.setNotNull(rowIndex);
       }
     }
-    offsetBuffer.setInt(size * OFFSET_WIDTH, nextOffset);
+    arrowVector.getOffsetBuffer().setInt(size * OFFSET_WIDTH, nextOffset);
   }
 
   //Handle cases for both internally constructed