You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pv...@apache.org on 2022/05/31 07:06:00 UTC
[hive] branch master updated: HIVE-22670: ArrayIndexOutOfBoundsException when vectorized reader is (#3328) (Ganesha Shreedhara and Abhay Chennagiri reviewed by Peter Vary)
This is an automated email from the ASF dual-hosted git repository.
pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 937b165d908 HIVE-22670: ArrayIndexOutOfBoundsException when vectorized reader is (#3328) (Ganesha Shreedhara and Abhay Chennagiri reviewed by Peter Vary)
937b165d908 is described below
commit 937b165d908229d6b01f3ffaa064cf442de1d9ec
Author: achennagiri <77...@users.noreply.github.com>
AuthorDate: Tue May 31 00:05:49 2022 -0700
HIVE-22670: ArrayIndexOutOfBoundsException when vectorized reader is (#3328) (Ganesha Shreedhara and Abhay Chennagiri reviewed by Peter Vary)
---
data/files/hive22670.parquet | Bin 0 -> 737 bytes
.../vector/VectorizedPrimitiveColumnReader.java | 134 +++++++++++++--------
.../clientpositive/parquet_vectorization_18.q | 24 ++++
.../llap/parquet_vectorization_18.q.out | 74 ++++++++++++
4 files changed, 179 insertions(+), 53 deletions(-)
diff --git a/data/files/hive22670.parquet b/data/files/hive22670.parquet
new file mode 100644
index 00000000000..2700b6fb711
Binary files /dev/null and b/data/files/hive22670.parquet differ
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java
index bb08c278668..db52d6a2964 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java
@@ -521,31 +521,37 @@ public class VectorizedPrimitiveColumnReader extends BaseVectorizedColumnReader
switch (primitiveColumnType.getPrimitiveCategory()) {
case INT:
for (int i = rowId; i < rowId + num; ++i) {
- ((LongColumnVector) column).vector[i] =
- dictionary.readInteger((int) dictionaryIds.vector[i]);
- if (!dictionary.isValid()) {
- setNullValue(column, i);
- ((LongColumnVector) column).vector[i] = 0;
+ if (!column.isNull[i]) {
+ ((LongColumnVector) column).vector[i] =
+ dictionary.readInteger((int) dictionaryIds.vector[i]);
+ if (!dictionary.isValid()) {
+ setNullValue(column, i);
+ ((LongColumnVector) column).vector[i] = 0;
+ }
}
}
break;
case BYTE:
for (int i = rowId; i < rowId + num; ++i) {
- ((LongColumnVector) column).vector[i] =
- dictionary.readTinyInt((int) dictionaryIds.vector[i]);
- if (!dictionary.isValid()) {
- setNullValue(column, i);
- ((LongColumnVector) column).vector[i] = 0;
+ if (!column.isNull[i]) {
+ ((LongColumnVector) column).vector[i] =
+ dictionary.readTinyInt((int) dictionaryIds.vector[i]);
+ if (!dictionary.isValid()) {
+ setNullValue(column, i);
+ ((LongColumnVector) column).vector[i] = 0;
+ }
}
}
break;
case SHORT:
for (int i = rowId; i < rowId + num; ++i) {
- ((LongColumnVector) column).vector[i] =
- dictionary.readSmallInt((int) dictionaryIds.vector[i]);
- if (!dictionary.isValid()) {
- setNullValue(column, i);
- ((LongColumnVector) column).vector[i] = 0;
+ if (!column.isNull[i]) {
+ ((LongColumnVector) column).vector[i] =
+ dictionary.readSmallInt((int) dictionaryIds.vector[i]);
+ if (!dictionary.isValid()) {
+ setNullValue(column, i);
+ ((LongColumnVector) column).vector[i] = 0;
+ }
}
}
break;
@@ -553,74 +559,92 @@ public class VectorizedPrimitiveColumnReader extends BaseVectorizedColumnReader
DateColumnVector dc = (DateColumnVector) column;
dc.setUsingProlepticCalendar(true);
for (int i = rowId; i < rowId + num; ++i) {
- dc.vector[i] =
- skipProlepticConversion ?
- dictionary.readLong((int) dictionaryIds.vector[i]) :
- CalendarUtils.convertDateToProleptic((int) dictionary.readLong((int) dictionaryIds.vector[i]));
- if (!dictionary.isValid()) {
- setNullValue(column, i);
- dc.vector[i] = 0;
+ if (!column.isNull[i]) {
+ dc.vector[i] =
+ skipProlepticConversion ?
+ dictionary.readLong((int) dictionaryIds.vector[i]) :
+ CalendarUtils.convertDateToProleptic((int) dictionary.readLong((int) dictionaryIds.vector[i]));
+ if (!dictionary.isValid()) {
+ setNullValue(column, i);
+ dc.vector[i] = 0;
+ }
}
}
break;
case INTERVAL_YEAR_MONTH:
case LONG:
for (int i = rowId; i < rowId + num; ++i) {
- ((LongColumnVector) column).vector[i] =
- dictionary.readLong((int) dictionaryIds.vector[i]);
- if (!dictionary.isValid()) {
- setNullValue(column, i);
- ((LongColumnVector) column).vector[i] = 0;
+ if (!column.isNull[i]) {
+ ((LongColumnVector) column).vector[i] =
+ dictionary.readLong((int) dictionaryIds.vector[i]);
+ if (!dictionary.isValid()) {
+ setNullValue(column, i);
+ ((LongColumnVector) column).vector[i] = 0;
+ }
}
}
break;
case BOOLEAN:
for (int i = rowId; i < rowId + num; ++i) {
- ((LongColumnVector) column).vector[i] =
- dictionary.readBoolean((int) dictionaryIds.vector[i]) ? 1 : 0;
+ if (!column.isNull[i]) {
+ ((LongColumnVector) column).vector[i] =
+ dictionary.readBoolean((int) dictionaryIds.vector[i]) ? 1 : 0;
+ }
}
break;
case DOUBLE:
for (int i = rowId; i < rowId + num; ++i) {
- ((DoubleColumnVector) column).vector[i] =
- dictionary.readDouble((int) dictionaryIds.vector[i]);
- if (!dictionary.isValid()) {
- setNullValue(column, i);
- ((DoubleColumnVector) column).vector[i] = 0;
+ if (!column.isNull[i]) {
+ ((DoubleColumnVector) column).vector[i] =
+ dictionary.readDouble((int) dictionaryIds.vector[i]);
+ if (!dictionary.isValid()) {
+ setNullValue(column, i);
+ ((DoubleColumnVector) column).vector[i] = 0;
+ }
}
}
break;
case BINARY:
for (int i = rowId; i < rowId + num; ++i) {
- ((BytesColumnVector) column)
- .setVal(i, dictionary.readBytes((int) dictionaryIds.vector[i]));
+ if (!column.isNull[i]) {
+ ((BytesColumnVector) column)
+ .setVal(i, dictionary.readBytes((int) dictionaryIds.vector[i]));
+ }
}
break;
case STRING:
for (int i = rowId; i < rowId + num; ++i) {
- ((BytesColumnVector) column)
- .setVal(i, dictionary.readString((int) dictionaryIds.vector[i]));
+ if (!column.isNull[i]) {
+ ((BytesColumnVector) column)
+ .setVal(i, dictionary.readString((int) dictionaryIds.vector[i]));
+ }
}
break;
case VARCHAR:
for (int i = rowId; i < rowId + num; ++i) {
- ((BytesColumnVector) column)
- .setVal(i, dictionary.readVarchar((int) dictionaryIds.vector[i]));
+ if (!column.isNull[i]) {
+ ((BytesColumnVector) column)
+ .setVal(i, dictionary.readVarchar((int) dictionaryIds.vector[i]));
+ }
}
break;
case CHAR:
for (int i = rowId; i < rowId + num; ++i) {
- ((BytesColumnVector) column)
- .setVal(i, dictionary.readChar((int) dictionaryIds.vector[i]));
+ if (!column.isNull[i]) {
+ ((BytesColumnVector) column)
+ .setVal(i, dictionary.readChar((int) dictionaryIds.vector[i]));
+ }
}
break;
case FLOAT:
for (int i = rowId; i < rowId + num; ++i) {
- ((DoubleColumnVector) column).vector[i] =
- dictionary.readFloat((int) dictionaryIds.vector[i]);
- if (!dictionary.isValid()) {
- setNullValue(column, i);
- ((DoubleColumnVector) column).vector[i] = 0;
+ if (!column.isNull[i]) {
+ ((DoubleColumnVector) column).vector[i] =
+ dictionary.readFloat((int) dictionaryIds.vector[i]);
+ if (!dictionary.isValid()) {
+ setNullValue(column, i);
+ ((DoubleColumnVector) column).vector[i] = 0;
+ }
}
}
break;
@@ -635,11 +659,13 @@ public class VectorizedPrimitiveColumnReader extends BaseVectorizedColumnReader
fillDecimalPrecisionScale(decimalLogicalType, decimalColumnVector);
for (int i = rowId; i < rowId + num; ++i) {
- decimalData = dictionary.readDecimal((int) dictionaryIds.vector[i]);
- if (dictionary.isValid()) {
- decimalColumnVector.vector[i].set(decimalData, decimalColumnVector.scale);
- } else {
- setNullValue(column, i);
+ if (!column.isNull[i]) {
+ decimalData = dictionary.readDecimal((int) dictionaryIds.vector[i]);
+ if (dictionary.isValid()) {
+ decimalColumnVector.vector[i].set(decimalData, decimalColumnVector.scale);
+ } else {
+ setNullValue(column, i);
+ }
}
}
break;
@@ -647,7 +673,9 @@ public class VectorizedPrimitiveColumnReader extends BaseVectorizedColumnReader
TimestampColumnVector tsc = (TimestampColumnVector) column;
tsc.setUsingProlepticCalendar(true);
for (int i = rowId; i < rowId + num; ++i) {
- tsc.set(i, dictionary.readTimestamp((int) dictionaryIds.vector[i]).toSqlTimestamp());
+ if (!column.isNull[i]) {
+ tsc.set(i, dictionary.readTimestamp((int) dictionaryIds.vector[i]).toSqlTimestamp());
+ }
}
break;
case INTERVAL_DAY_TIME:
diff --git a/ql/src/test/queries/clientpositive/parquet_vectorization_18.q b/ql/src/test/queries/clientpositive/parquet_vectorization_18.q
new file mode 100644
index 00000000000..d7d707d5cae
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_vectorization_18.q
@@ -0,0 +1,24 @@
+dfs ${system:test.dfs.mkdir} -p ${system:test.tmp.dir}/hive22670;
+dfs -copyFromLocal ../../data/files/hive22670.parquet ${system:test.tmp.dir}/hive22670/;
+dfs -ls ${system:test.tmp.dir}/hive22670/;
+
+drop table if exists test_parquet_na;
+create external table test_parquet_na(
+ x int,
+ y int)
+ stored as parquet
+ location '${system:test.tmp.dir}/hive22670';
+
+set hive.vectorized.execution.enabled=false;
+select * from test_parquet_na;
+select * from test_parquet_na order by y;
+
+set hive.vectorized.execution.enabled=true;
+select * from test_parquet_na;
+
+set hive.vectorized.execution.enabled=true;
+select * from test_parquet_na order by y;
+
+drop table test_parquet_na;
+dfs -ls ${system:test.tmp.dir}/hive22670/;
+dfs -rmr ${system:test.tmp.dir}/hive22670;
diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_18.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_18.q.out
new file mode 100644
index 00000000000..49ace72fefe
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_18.q.out
@@ -0,0 +1,74 @@
+Found 1 items
+#### A masked pattern was here ####
+PREHOOK: query: drop table if exists test_parquet_na
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists test_parquet_na
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create external table test_parquet_na(
+ x int,
+ y int)
+ stored as parquet
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_parquet_na
+POSTHOOK: query: create external table test_parquet_na(
+ x int,
+ y int)
+ stored as parquet
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_parquet_na
+PREHOOK: query: select * from test_parquet_na
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_parquet_na
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_parquet_na
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_parquet_na
+#### A masked pattern was here ####
+NULL 1
+NULL 2
+PREHOOK: query: select * from test_parquet_na order by y
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_parquet_na
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_parquet_na order by y
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_parquet_na
+#### A masked pattern was here ####
+NULL 1
+NULL 2
+PREHOOK: query: select * from test_parquet_na
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_parquet_na
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_parquet_na
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_parquet_na
+#### A masked pattern was here ####
+NULL 1
+NULL 2
+PREHOOK: query: select * from test_parquet_na order by y
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_parquet_na
+#### A masked pattern was here ####
+POSTHOOK: query: select * from test_parquet_na order by y
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_parquet_na
+#### A masked pattern was here ####
+NULL 1
+NULL 2
+PREHOOK: query: drop table test_parquet_na
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@test_parquet_na
+PREHOOK: Output: default@test_parquet_na
+POSTHOOK: query: drop table test_parquet_na
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@test_parquet_na
+POSTHOOK: Output: default@test_parquet_na
+Found 1 items
+#### A masked pattern was here ####