You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pv...@apache.org on 2022/05/31 07:06:00 UTC

[hive] branch master updated: HIVE-22670: ArrayIndexOutOfBoundsException when vectorized reader is (#3328) (Ganesha Shreedhara and Abhay Chennagiri reviewed by Peter Vary)

This is an automated email from the ASF dual-hosted git repository.

pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 937b165d908 HIVE-22670: ArrayIndexOutOfBoundsException when vectorized reader is (#3328) (Ganesha Shreedhara and Abhay Chennagiri reviewed by Peter Vary)
937b165d908 is described below

commit 937b165d908229d6b01f3ffaa064cf442de1d9ec
Author: achennagiri <77...@users.noreply.github.com>
AuthorDate: Tue May 31 00:05:49 2022 -0700

    HIVE-22670: ArrayIndexOutOfBoundsException when vectorized reader is (#3328) (Ganesha Shreedhara and Abhay Chennagiri reviewed by Peter Vary)
---
 data/files/hive22670.parquet                       | Bin 0 -> 737 bytes
 .../vector/VectorizedPrimitiveColumnReader.java    | 134 +++++++++++++--------
 .../clientpositive/parquet_vectorization_18.q      |  24 ++++
 .../llap/parquet_vectorization_18.q.out            |  74 ++++++++++++
 4 files changed, 179 insertions(+), 53 deletions(-)

diff --git a/data/files/hive22670.parquet b/data/files/hive22670.parquet
new file mode 100644
index 00000000000..2700b6fb711
Binary files /dev/null and b/data/files/hive22670.parquet differ
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java
index bb08c278668..db52d6a2964 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java
@@ -521,31 +521,37 @@ public class VectorizedPrimitiveColumnReader extends BaseVectorizedColumnReader
     switch (primitiveColumnType.getPrimitiveCategory()) {
     case INT:
       for (int i = rowId; i < rowId + num; ++i) {
-        ((LongColumnVector) column).vector[i] =
-            dictionary.readInteger((int) dictionaryIds.vector[i]);
-        if (!dictionary.isValid()) {
-          setNullValue(column, i);
-          ((LongColumnVector) column).vector[i] = 0;
+        if (!column.isNull[i]) {
+          ((LongColumnVector) column).vector[i] =
+                  dictionary.readInteger((int) dictionaryIds.vector[i]);
+          if (!dictionary.isValid()) {
+            setNullValue(column, i);
+            ((LongColumnVector) column).vector[i] = 0;
+          }
         }
       }
       break;
     case BYTE:
       for (int i = rowId; i < rowId + num; ++i) {
-        ((LongColumnVector) column).vector[i] =
-            dictionary.readTinyInt((int) dictionaryIds.vector[i]);
-        if (!dictionary.isValid()) {
-          setNullValue(column, i);
-          ((LongColumnVector) column).vector[i] = 0;
+        if (!column.isNull[i]) {
+          ((LongColumnVector) column).vector[i] =
+                  dictionary.readTinyInt((int) dictionaryIds.vector[i]);
+          if (!dictionary.isValid()) {
+            setNullValue(column, i);
+            ((LongColumnVector) column).vector[i] = 0;
+          }
         }
       }
       break;
     case SHORT:
       for (int i = rowId; i < rowId + num; ++i) {
-        ((LongColumnVector) column).vector[i] =
-            dictionary.readSmallInt((int) dictionaryIds.vector[i]);
-        if (!dictionary.isValid()) {
-          setNullValue(column, i);
-          ((LongColumnVector) column).vector[i] = 0;
+        if (!column.isNull[i]) {
+          ((LongColumnVector) column).vector[i] =
+                  dictionary.readSmallInt((int) dictionaryIds.vector[i]);
+          if (!dictionary.isValid()) {
+            setNullValue(column, i);
+            ((LongColumnVector) column).vector[i] = 0;
+          }
         }
       }
       break;
@@ -553,74 +559,92 @@ public class VectorizedPrimitiveColumnReader extends BaseVectorizedColumnReader
       DateColumnVector dc = (DateColumnVector) column;
       dc.setUsingProlepticCalendar(true);
       for (int i = rowId; i < rowId + num; ++i) {
-        dc.vector[i] =
-            skipProlepticConversion ?
-                dictionary.readLong((int) dictionaryIds.vector[i]) :
-                CalendarUtils.convertDateToProleptic((int) dictionary.readLong((int) dictionaryIds.vector[i]));
-        if (!dictionary.isValid()) {
-          setNullValue(column, i);
-          dc.vector[i] = 0;
+        if (!column.isNull[i]) {
+          dc.vector[i] =
+                  skipProlepticConversion ?
+                          dictionary.readLong((int) dictionaryIds.vector[i]) :
+                          CalendarUtils.convertDateToProleptic((int) dictionary.readLong((int) dictionaryIds.vector[i]));
+          if (!dictionary.isValid()) {
+            setNullValue(column, i);
+            dc.vector[i] = 0;
+          }
         }
       }
       break;
     case INTERVAL_YEAR_MONTH:
     case LONG:
       for (int i = rowId; i < rowId + num; ++i) {
-        ((LongColumnVector) column).vector[i] =
-            dictionary.readLong((int) dictionaryIds.vector[i]);
-        if (!dictionary.isValid()) {
-          setNullValue(column, i);
-          ((LongColumnVector) column).vector[i] = 0;
+        if (!column.isNull[i]) {
+          ((LongColumnVector) column).vector[i] =
+                  dictionary.readLong((int) dictionaryIds.vector[i]);
+          if (!dictionary.isValid()) {
+            setNullValue(column, i);
+            ((LongColumnVector) column).vector[i] = 0;
+          }
         }
       }
       break;
     case BOOLEAN:
       for (int i = rowId; i < rowId + num; ++i) {
-        ((LongColumnVector) column).vector[i] =
-            dictionary.readBoolean((int) dictionaryIds.vector[i]) ? 1 : 0;
+        if (!column.isNull[i]) {
+          ((LongColumnVector) column).vector[i] =
+                  dictionary.readBoolean((int) dictionaryIds.vector[i]) ? 1 : 0;
+        }
       }
       break;
     case DOUBLE:
       for (int i = rowId; i < rowId + num; ++i) {
-        ((DoubleColumnVector) column).vector[i] =
-            dictionary.readDouble((int) dictionaryIds.vector[i]);
-        if (!dictionary.isValid()) {
-          setNullValue(column, i);
-          ((DoubleColumnVector) column).vector[i] = 0;
+        if (!column.isNull[i]) {
+          ((DoubleColumnVector) column).vector[i] =
+                  dictionary.readDouble((int) dictionaryIds.vector[i]);
+          if (!dictionary.isValid()) {
+            setNullValue(column, i);
+            ((DoubleColumnVector) column).vector[i] = 0;
+          }
         }
       }
       break;
     case BINARY:
       for (int i = rowId; i < rowId + num; ++i) {
-        ((BytesColumnVector) column)
-            .setVal(i, dictionary.readBytes((int) dictionaryIds.vector[i]));
+        if (!column.isNull[i]) {
+          ((BytesColumnVector) column)
+                  .setVal(i, dictionary.readBytes((int) dictionaryIds.vector[i]));
+        }
       }
       break;
     case STRING:
       for (int i = rowId; i < rowId + num; ++i) {
-        ((BytesColumnVector) column)
-            .setVal(i, dictionary.readString((int) dictionaryIds.vector[i]));
+        if (!column.isNull[i]) {
+          ((BytesColumnVector) column)
+                  .setVal(i, dictionary.readString((int) dictionaryIds.vector[i]));
+        }
       }
       break;
     case VARCHAR:
       for (int i = rowId; i < rowId + num; ++i) {
-        ((BytesColumnVector) column)
-            .setVal(i, dictionary.readVarchar((int) dictionaryIds.vector[i]));
+        if (!column.isNull[i]) {
+          ((BytesColumnVector) column)
+                  .setVal(i, dictionary.readVarchar((int) dictionaryIds.vector[i]));
+        }
       }
       break;
     case CHAR:
       for (int i = rowId; i < rowId + num; ++i) {
-        ((BytesColumnVector) column)
-            .setVal(i, dictionary.readChar((int) dictionaryIds.vector[i]));
+        if (!column.isNull[i]) {
+          ((BytesColumnVector) column)
+                  .setVal(i, dictionary.readChar((int) dictionaryIds.vector[i]));
+        }
       }
       break;
     case FLOAT:
       for (int i = rowId; i < rowId + num; ++i) {
-        ((DoubleColumnVector) column).vector[i] =
-            dictionary.readFloat((int) dictionaryIds.vector[i]);
-        if (!dictionary.isValid()) {
-          setNullValue(column, i);
-          ((DoubleColumnVector) column).vector[i] = 0;
+        if (!column.isNull[i]) {
+          ((DoubleColumnVector) column).vector[i] =
+                  dictionary.readFloat((int) dictionaryIds.vector[i]);
+          if (!dictionary.isValid()) {
+            setNullValue(column, i);
+            ((DoubleColumnVector) column).vector[i] = 0;
+          }
         }
       }
       break;
@@ -635,11 +659,13 @@ public class VectorizedPrimitiveColumnReader extends BaseVectorizedColumnReader
       fillDecimalPrecisionScale(decimalLogicalType, decimalColumnVector);
 
       for (int i = rowId; i < rowId + num; ++i) {
-        decimalData = dictionary.readDecimal((int) dictionaryIds.vector[i]);
-        if (dictionary.isValid()) {
-          decimalColumnVector.vector[i].set(decimalData, decimalColumnVector.scale);
-        } else {
-          setNullValue(column, i);
+        if (!column.isNull[i]) {
+          decimalData = dictionary.readDecimal((int) dictionaryIds.vector[i]);
+          if (dictionary.isValid()) {
+            decimalColumnVector.vector[i].set(decimalData, decimalColumnVector.scale);
+          } else {
+            setNullValue(column, i);
+          }
         }
       }
       break;
@@ -647,7 +673,9 @@ public class VectorizedPrimitiveColumnReader extends BaseVectorizedColumnReader
       TimestampColumnVector tsc = (TimestampColumnVector) column;
       tsc.setUsingProlepticCalendar(true);
       for (int i = rowId; i < rowId + num; ++i) {
-        tsc.set(i, dictionary.readTimestamp((int) dictionaryIds.vector[i]).toSqlTimestamp());
+        if (!column.isNull[i]) {
+          tsc.set(i, dictionary.readTimestamp((int) dictionaryIds.vector[i]).toSqlTimestamp());
+        }
       }
       break;
     case INTERVAL_DAY_TIME:
diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_18.q b/ql/src/test/queries/clientpositive/parquet_vectorization_18.q
new file mode 100644
index 00000000000..d7d707d5cae
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_vectorization_18.q
@@ -0,0 +1,24 @@
+dfs ${system:test.dfs.mkdir} -p ${system:test.tmp.dir}/hive22670;
+dfs -copyFromLocal ../../data/files/hive22670.parquet ${system:test.tmp.dir}/hive22670/;
+dfs -ls ${system:test.tmp.dir}/hive22670/;
+
+drop table if exists test_parquet_na;
+create external table test_parquet_na(
+            x int,
+            y int)
+  stored as parquet
+  location '${system:test.tmp.dir}/hive22670';
+
+set hive.vectorized.execution.enabled=false;
+select * from test_parquet_na;
+select * from test_parquet_na order by y;
+
+set hive.vectorized.execution.enabled=true;
+select * from test_parquet_na;
+
+set hive.vectorized.execution.enabled=true;
+select * from test_parquet_na order by y;
+
+drop table test_parquet_na;
+dfs -ls  ${system:test.tmp.dir}/hive22670/;
+dfs -rmr  ${system:test.tmp.dir}/hive22670;
diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_18.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_18.q.out
new file mode 100644
index 00000000000..49ace72fefe
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_18.q.out
@@ -0,0 +1,74 @@
+Found 1 items
+#### A masked pattern was here ####
+PREHOOK: query: drop table if exists test_parquet_na
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists test_parquet_na
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create external table test_parquet_na(
+            x int,
+            y int)
+  stored as parquet
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_parquet_na
+POSTHOOK: query: create external table test_parquet_na(
+            x int,
+            y int)
+  stored as parquet
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_parquet_na
+PREHOOK: query: select * from test_parquet_na
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_parquet_na
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_parquet_na
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_parquet_na
+#### A masked pattern was here ####
+NULL	1
+NULL	2
+PREHOOK: query: select * from test_parquet_na order by y
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_parquet_na
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_parquet_na order by y
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_parquet_na
+#### A masked pattern was here ####
+NULL	1
+NULL	2
+PREHOOK: query: select * from test_parquet_na
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_parquet_na
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_parquet_na
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_parquet_na
+#### A masked pattern was here ####
+NULL	1
+NULL	2
+PREHOOK: query: select * from test_parquet_na order by y
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_parquet_na
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_parquet_na order by y
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_parquet_na
+#### A masked pattern was here ####
+NULL	1
+NULL	2
+PREHOOK: query: drop table test_parquet_na
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@test_parquet_na
+PREHOOK: Output: default@test_parquet_na
+POSTHOOK: query: drop table test_parquet_na
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@test_parquet_na
+POSTHOOK: Output: default@test_parquet_na
+Found 1 items
+#### A masked pattern was here ####