You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ma...@apache.org on 2020/03/19 02:41:06 UTC
[hive] branch master updated: HIVE-23034 : Arrow serializer should
not keep the reference of arrow offset and validity buffers. (Shubham
Chaurasia, reviewed by Thejas Nair)
This is an automated email from the ASF dual-hosted git repository.
mahesh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 6f9ae63 HIVE-23034 : Arrow serializer should not keep the reference of arrow offset and validity buffers. (Shubham Chaurasia, reviewed by Thejas Nair)
6f9ae63 is described below
commit 6f9ae63e78eb0d529f3cd96f51f93247783265bc
Author: Shubham Chaurasia <sc...@cloudera.com>
AuthorDate: Thu Mar 19 08:08:53 2020 +0530
HIVE-23034 : Arrow serializer should not keep the reference of arrow offset and validity buffers. (Shubham Chaurasia, reviewed by Thejas Nair)
Signed-off-by: Mahesh Kumar Behera <ma...@apache.org>
---
.../org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java | 47 ++++++++++++++++++++++
...iniLlap.java => BaseJdbcWithMiniLlap.java.orig} | 0
.../apache/hadoop/hive/ql/io/arrow/Serializer.java | 17 ++++----
3 files changed, 54 insertions(+), 10 deletions(-)
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java
index 4c46db9..3973ec9 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java
@@ -360,6 +360,53 @@ public abstract class BaseJdbcWithMiniLlap {
assertEquals(0, rowCount);
}
+ @Test(timeout = 300000)
+ public void testInvalidReferenceCountScenario() throws Exception {
+ final String tableName = "testInvalidReferenceCountScenario";
+ try (Statement stmt = hs2Conn.createStatement()) {
+ String createQuery =
+ "create table " + tableName +
+ "(arr1 array<struct<f1:string,f2:string,arr2:array<struct<f3:string,f4:string,f5:string>>>>, "
+ + "c2 int) STORED AS ORC";
+
+ // create table
+ stmt.execute("DROP TABLE IF EXISTS " + tableName);
+ stmt.execute(createQuery);
+ // load data
+ stmt.execute("INSERT INTO " + tableName + " VALUES "
+ // value 1
+ + "(ARRAY(NAMED_STRUCT('f1','a1', "
+ + "'f2','a2', "
+ + "'arr2',"
+ + " ARRAY("
+ + "NAMED_STRUCT('f3', cast(null as string), 'f4', cast(null as string), 'f5', cast(null as string)))), "
+ + "NAMED_STRUCT('f1','a1', 'f2','a2', 'arr2', "
+ + "ARRAY(NAMED_STRUCT('f3', 'fielddddddd3333333', 'f4', 'field4', 'f5', 'field5'))), "
+ + "NAMED_STRUCT('f1','a1', 'f2','a2', 'arr2', ARRAY(NAMED_STRUCT('f3', cast(null as string), "
+ + "'f4', cast(null as string), 'f5', cast(null as string)))), "
+ + "NAMED_STRUCT('f1','a1', 'f2','a2', 'arr2', ARRAY(NAMED_STRUCT('f3', 'fielddddddd3333333', "
+ + "'f4', 'field4', 'f5', 'field5'))), NAMED_STRUCT('f1','a1', 'f2','a2', 'arr2', "
+ + "ARRAY(NAMED_STRUCT('f3', cast(null as string), 'f4', cast(null as string), 'f5', cast(null as string)))),"
+ + " NAMED_STRUCT('f1','a1', 'f2','a2', 'arr2', "
+ + "ARRAY(NAMED_STRUCT('f3', 'fielddddddd3333333', 'f4', 'field4', 'f5', 'field5')))), 1)");
+
+ // generate 16384 rows from above records
+ for (int i = 0; i < 14; i++) {
+ stmt.execute(String.format("insert into %s select * from %s", tableName, tableName));
+ }
+ // validate test table
+ ResultSet res = stmt.executeQuery("SELECT count(*) FROM " + tableName);
+ assertTrue(res.next());
+ assertEquals(16384, res.getInt(1));
+ res.close();
+ }
+ // should not throw - IllegalReferenceCountException: refCnt: 0
+ RowCollector rowCollector = new RowCollector();
+ String query = "select * from " + tableName;
+ int rowCount = processQuery(query, 1, rowCollector);
+ assertEquals(16384, rowCount);
+
+ }
@Test(timeout = 60000)
public void testNonAsciiStrings() throws Exception {
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java.orig
similarity index 100%
copy from itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java
copy to itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java.orig
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
index 0350977..12da074 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/arrow/Serializer.java
@@ -313,16 +313,15 @@ public class Serializer {
write(arrowVector, structListVector, structListTypeInfo, size, vectorizedRowBatch, isNative);
- final ArrowBuf validityBuffer = arrowVector.getValidityBuffer();
for (int rowIndex = 0; rowIndex < size; rowIndex++) {
int selectedIndex = rowIndex;
if (vectorizedRowBatch.selectedInUse) {
selectedIndex = vectorizedRowBatch.selected[rowIndex];
}
if (hiveVector.isNull[selectedIndex]) {
- BitVectorHelper.setValidityBit(validityBuffer, rowIndex, 0);
+ BitVectorHelper.setValidityBit(arrowVector.getValidityBuffer(), rowIndex, 0);
} else {
- BitVectorHelper.setValidityBitToOne(validityBuffer, rowIndex);
+ BitVectorHelper.setValidityBitToOne(arrowVector.getValidityBuffer(), rowIndex);
}
}
}
@@ -360,12 +359,11 @@ public class Serializer {
write(arrowFieldVector, hiveFieldVector, fieldTypeInfo, size, vectorizedRowBatch, isNative);
}
- final ArrowBuf validityBuffer = arrowVector.getValidityBuffer();
for (int rowIndex = 0; rowIndex < size; rowIndex++) {
if (hiveVector.isNull[rowIndex]) {
- BitVectorHelper.setValidityBit(validityBuffer, rowIndex, 0);
+ BitVectorHelper.setValidityBit(arrowVector.getValidityBuffer(), rowIndex, 0);
} else {
- BitVectorHelper.setValidityBitToOne(validityBuffer, rowIndex);
+ BitVectorHelper.setValidityBitToOne(arrowVector.getValidityBuffer(), rowIndex);
}
}
}
@@ -426,7 +424,6 @@ public class Serializer {
write(arrowElementVector, hiveElementVector, elementTypeInfo, correctedSize, correctedVrb, isNative);
- final ArrowBuf offsetBuffer = arrowVector.getOffsetBuffer();
int nextOffset = 0;
for (int rowIndex = 0; rowIndex < size; rowIndex++) {
@@ -435,14 +432,14 @@ public class Serializer {
selectedIndex = vectorizedRowBatch.selected[rowIndex];
}
if (hiveVector.isNull[selectedIndex]) {
- offsetBuffer.setInt(rowIndex * OFFSET_WIDTH, nextOffset);
+ arrowVector.getOffsetBuffer().setInt(rowIndex * OFFSET_WIDTH, nextOffset);
} else {
- offsetBuffer.setInt(rowIndex * OFFSET_WIDTH, nextOffset);
+ arrowVector.getOffsetBuffer().setInt(rowIndex * OFFSET_WIDTH, nextOffset);
nextOffset += (int) hiveVector.lengths[selectedIndex];
arrowVector.setNotNull(rowIndex);
}
}
- offsetBuffer.setInt(size * OFFSET_WIDTH, nextOffset);
+ arrowVector.getOffsetBuffer().setInt(size * OFFSET_WIDTH, nextOffset);
}
//Handle cases for both internally constructed