You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/06/03 20:35:34 UTC
hive git commit: HIVE-19110: Vectorization: Enabling vectorization
causes TestContribCliDriver udf_example_arraymapstruct.q to produce Wrong
Results (Haifeng Chen, reviewed by Matt McCline)
Repository: hive
Updated Branches:
refs/heads/master 8cb99d680 -> a929d4c40
HIVE-19110: Vectorization: Enabling vectorization causes TestContribCliDriver udf_example_arraymapstruct.q to produce Wrong Results (Haifeng Chen, reviewed by Matt McCline)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a929d4c4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a929d4c4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a929d4c4
Branch: refs/heads/master
Commit: a929d4c40438310de436b4cd227296a34e17d03c
Parents: 8cb99d6
Author: Matt McCline <mm...@hortonworks.com>
Authored: Sun Jun 3 15:35:01 2018 -0500
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Sun Jun 3 15:35:01 2018 -0500
----------------------------------------------------------------------
.../clientpositive/udf_example_arraymapstruct.q | 1 -
.../vector_udf_example_arraymapstruct.q | 21 +++
.../vector_udf_example_arraymapstruct.q.out | 95 +++++++++++
.../VectorExpressionWriterFactory.java | 58 ++++---
.../TestVectorExpressionWriters.java | 166 +++++++++++++++++++
5 files changed, 315 insertions(+), 26 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/a929d4c4/contrib/src/test/queries/clientpositive/udf_example_arraymapstruct.q
----------------------------------------------------------------------
diff --git a/contrib/src/test/queries/clientpositive/udf_example_arraymapstruct.q b/contrib/src/test/queries/clientpositive/udf_example_arraymapstruct.q
index f097958..1d8f6a8 100644
--- a/contrib/src/test/queries/clientpositive/udf_example_arraymapstruct.q
+++ b/contrib/src/test/queries/clientpositive/udf_example_arraymapstruct.q
@@ -1,5 +1,4 @@
--! qt:dataset:src_thrift
--- Suppress vectorization due to known bug. See HIVE-19110.
set hive.vectorized.execution.enabled=false;
set hive.test.vectorized.execution.enabled.override=disable;
http://git-wip-us.apache.org/repos/asf/hive/blob/a929d4c4/contrib/src/test/queries/clientpositive/vector_udf_example_arraymapstruct.q
----------------------------------------------------------------------
diff --git a/contrib/src/test/queries/clientpositive/vector_udf_example_arraymapstruct.q b/contrib/src/test/queries/clientpositive/vector_udf_example_arraymapstruct.q
new file mode 100644
index 0000000..273663a
--- /dev/null
+++ b/contrib/src/test/queries/clientpositive/vector_udf_example_arraymapstruct.q
@@ -0,0 +1,21 @@
+--! qt:dataset:src_thrift
+
+set hive.vectorized.execution.enabled=true;
+set hive.test.vectorized.execution.enabled.override=enable;
+
+add jar ${system:maven.local.repository}/org/apache/hive/hive-contrib/${system:hive.version}/hive-contrib-${system:hive.version}.jar;
+
+CREATE TEMPORARY FUNCTION example_arraysum AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleArraySum';
+CREATE TEMPORARY FUNCTION example_mapconcat AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleMapConcat';
+CREATE TEMPORARY FUNCTION example_structprint AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleStructPrint';
+
+EXPLAIN
+SELECT example_arraysum(lint), example_mapconcat(mstringstring), example_structprint(lintstring[0])
+FROM src_thrift;
+
+SELECT example_arraysum(lint), example_mapconcat(mstringstring), example_structprint(lintstring[0])
+FROM src_thrift;
+
+DROP TEMPORARY FUNCTION example_arraysum;
+DROP TEMPORARY FUNCTION example_mapconcat;
+DROP TEMPORARY FUNCTION example_structprint;
http://git-wip-us.apache.org/repos/asf/hive/blob/a929d4c4/contrib/src/test/results/clientpositive/vector_udf_example_arraymapstruct.q.out
----------------------------------------------------------------------
diff --git a/contrib/src/test/results/clientpositive/vector_udf_example_arraymapstruct.q.out b/contrib/src/test/results/clientpositive/vector_udf_example_arraymapstruct.q.out
new file mode 100644
index 0000000..75bbdff
--- /dev/null
+++ b/contrib/src/test/results/clientpositive/vector_udf_example_arraymapstruct.q.out
@@ -0,0 +1,95 @@
+PREHOOK: query: CREATE TEMPORARY FUNCTION example_arraysum AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleArraySum'
+PREHOOK: type: CREATEFUNCTION
+PREHOOK: Output: example_arraysum
+POSTHOOK: query: CREATE TEMPORARY FUNCTION example_arraysum AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleArraySum'
+POSTHOOK: type: CREATEFUNCTION
+POSTHOOK: Output: example_arraysum
+PREHOOK: query: CREATE TEMPORARY FUNCTION example_mapconcat AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleMapConcat'
+PREHOOK: type: CREATEFUNCTION
+PREHOOK: Output: example_mapconcat
+POSTHOOK: query: CREATE TEMPORARY FUNCTION example_mapconcat AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleMapConcat'
+POSTHOOK: type: CREATEFUNCTION
+POSTHOOK: Output: example_mapconcat
+PREHOOK: query: CREATE TEMPORARY FUNCTION example_structprint AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleStructPrint'
+PREHOOK: type: CREATEFUNCTION
+PREHOOK: Output: example_structprint
+POSTHOOK: query: CREATE TEMPORARY FUNCTION example_structprint AS 'org.apache.hadoop.hive.contrib.udf.example.UDFExampleStructPrint'
+POSTHOOK: type: CREATEFUNCTION
+POSTHOOK: Output: example_structprint
+PREHOOK: query: EXPLAIN
+SELECT example_arraysum(lint), example_mapconcat(mstringstring), example_structprint(lintstring[0])
+FROM src_thrift
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT example_arraysum(lint), example_mapconcat(mstringstring), example_structprint(lintstring[0])
+FROM src_thrift
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src_thrift
+ Statistics: Num rows: 11 Data size: 30700 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: example_arraysum(lint) (type: double), example_mapconcat(mstringstring) (type: string), example_structprint(lintstring[0]) (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 11 Data size: 30700 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 11 Data size: 30700 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT example_arraysum(lint), example_mapconcat(mstringstring), example_structprint(lintstring[0])
+FROM src_thrift
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_thrift
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT example_arraysum(lint), example_mapconcat(mstringstring), example_structprint(lintstring[0])
+FROM src_thrift
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_thrift
+#### A masked pattern was here ####
+0.0 (key_0:value_0) (0:0)(1:0)(2:0)
+6.0 (key_1:value_1) (0:1)(1:1)(2:1)
+12.0 (key_2:value_2) (0:4)(1:8)(2:2)
+18.0 (key_3:value_3) (0:9)(1:27)(2:3)
+24.0 (key_4:value_4) (0:16)(1:64)(2:4)
+30.0 (key_5:value_5) (0:25)(1:125)(2:5)
+36.0 (key_6:value_6) (0:36)(1:216)(2:6)
+42.0 (key_7:value_7) (0:49)(1:343)(2:7)
+48.0 (key_8:value_8) (0:64)(1:512)(2:8)
+54.0 (key_9:value_9) (0:81)(1:729)(2:9)
+NULL NULL NULL
+PREHOOK: query: DROP TEMPORARY FUNCTION example_arraysum
+PREHOOK: type: DROPFUNCTION
+PREHOOK: Output: example_arraysum
+POSTHOOK: query: DROP TEMPORARY FUNCTION example_arraysum
+POSTHOOK: type: DROPFUNCTION
+POSTHOOK: Output: example_arraysum
+PREHOOK: query: DROP TEMPORARY FUNCTION example_mapconcat
+PREHOOK: type: DROPFUNCTION
+PREHOOK: Output: example_mapconcat
+POSTHOOK: query: DROP TEMPORARY FUNCTION example_mapconcat
+POSTHOOK: type: DROPFUNCTION
+POSTHOOK: Output: example_mapconcat
+PREHOOK: query: DROP TEMPORARY FUNCTION example_structprint
+PREHOOK: type: DROPFUNCTION
+PREHOOK: Output: example_structprint
+POSTHOOK: query: DROP TEMPORARY FUNCTION example_structprint
+POSTHOOK: type: DROPFUNCTION
+POSTHOOK: Output: example_structprint
http://git-wip-us.apache.org/repos/asf/hive/blob/a929d4c4/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
index 55dc461..a086535 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
@@ -43,6 +43,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.SettableListObjectInspector
import org.apache.hadoop.hive.serde2.objectinspector.SettableMapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.SettableUnionObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBinaryObjectInspector;
@@ -1590,20 +1591,7 @@ public final class VectorExpressionWriterFactory {
@Override
public Object writeValue(ColumnVector column, int row) throws HiveException {
- final StructColumnVector structColVector = (StructColumnVector) column;
- final SettableStructObjectInspector structOI =
- (SettableStructObjectInspector) this.objectInspector;
- final List<? extends StructField> fields = structOI.getAllStructFieldRefs();
- final List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
-
- final int fieldSize = fields.size();
- for (int i = 0; i < fieldSize; i++) {
- final StructField structField = fields.get(i);
- final Object value = vectorExtractRow.extractRowColumn(structColVector.fields[i],
- fieldTypeInfos.get(i), structField.getFieldObjectInspector(), row);
- structOI.setStructFieldData(obj, structField, value);
- }
- return this.obj;
+ return setValueInternal(obj, column, row);
}
@Override
@@ -1612,18 +1600,38 @@ public final class VectorExpressionWriterFactory {
struct = initValue(null);
}
- final StructColumnVector structColVector = (StructColumnVector) column;
- final SettableStructObjectInspector structOI =
- (SettableStructObjectInspector) this.objectInspector;
- final List<? extends StructField> fields = structOI.getAllStructFieldRefs();
- final List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
+ return setValueInternal(struct, column, row);
+ }
- final int fieldSize = fields.size();
- for (int i = 0; i < fieldSize; i++) {
- final StructField structField = fields.get(i);
- final Object value = vectorExtractRow.extractRowColumn(structColVector.fields[i],
- fieldTypeInfos.get(i), structField.getFieldObjectInspector(), row);
- structOI.setStructFieldData(struct, structField, value);
+ private Object setValueInternal(final Object struct, ColumnVector colVector, int batchIndex) {
+ if (colVector == null) {
+ // The planner will not include unneeded columns for reading but other parts of execution
+ // may ask for them..
+ return null;
+ }
+
+ final int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex);
+ if (!colVector.noNulls && colVector.isNull[adjustedIndex]) {
+ return null;
+ }
+
+ final StructColumnVector structColumnVector = (StructColumnVector) colVector;
+ final StandardStructObjectInspector structInspector =
+ (StandardStructObjectInspector) objectInspector;
+ final List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
+ final int size = fieldTypeInfos.size();
+ final List<? extends StructField> structFields =
+ structInspector.getAllStructFieldRefs();
+
+ for (int i = 0; i < size; i++) {
+ final StructField structField = structFields.get(i);
+ final TypeInfo fieldTypeInfo = fieldTypeInfos.get(i);
+ final Object value = vectorExtractRow.extractRowColumn(
+ structColumnVector.fields[i],
+ fieldTypeInfo,
+ structField.getFieldObjectInspector(),
+ adjustedIndex);
+ structInspector.setStructFieldData(struct, structField, value);
}
return struct;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a929d4c4/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorExpressionWriters.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorExpressionWriters.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorExpressionWriters.java
index fe3c91c..0bae9b4 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorExpressionWriters.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorExpressionWriters.java
@@ -21,16 +21,21 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions;
import java.sql.Timestamp;
import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
import java.util.Random;
import junit.framework.Assert;
+import junit.framework.TestCase;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -44,6 +49,8 @@ import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.SettableListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.SettableMapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
@@ -379,6 +386,155 @@ public class TestVectorExpressionWriters {
}
}
+ @SuppressWarnings("unchecked")
+ private void testListLong() throws HiveException {
+ LongColumnVector icv = VectorizedRowGroupGenUtil.generateLongColumnVector(true, false,
+ 5, new Random(10));
+ ListColumnVector cv = new ListColumnVector(3, icv);
+ cv.init();
+
+ // set the offset and length for the two elements
+ cv.offsets[0] = 0;
+ cv.lengths[0] = 2;
+ cv.offsets[1] = 2;
+ cv.lengths[1] = 3;
+
+ // initialize the integer values
+ for (int i = 0; i < 5; i++) {
+ icv.vector[i] = i;
+ icv.isNull[i] = false;
+ }
+ icv.noNulls = true;
+
+ cv.isNull[0] = false;
+ cv.isNull[1] = false;
+ cv.isNull[2] = true;
+ cv.noNulls = false;
+
+ SettableListObjectInspector listOI =
+ ObjectInspectorFactory.getStandardListObjectInspector(
+ PrimitiveObjectInspectorFactory.writableIntObjectInspector);
+
+ VectorExpressionWriter vew = VectorExpressionWriterFactory.genVectorExpressionWritable(listOI);
+
+ List<Object> values1 = (List<Object>) listOI.create(2);
+ vew.setValue(values1, cv, 0);
+
+ List<Object> values2 = (List<Object>) listOI.create(3);
+ vew.setValue(values2, cv, 1);
+
+ TestCase.assertEquals(values1.size(), 2);
+ TestCase.assertEquals(values2.size(), 3);
+
+ for (int i = 0; i < values1.size(); i++) {
+ IntWritable w = (IntWritable) values1.get(i);
+ TestCase.assertEquals(i, w.get());
+ }
+
+ for (int i = 0; i < values2.size(); i++) {
+ IntWritable w = (IntWritable) values2.get(i);
+ TestCase.assertEquals(2 + i, w.get());
+ }
+
+ List<Object> values3 = (List<Object>) vew.writeValue(cv, 0);
+ TestCase.assertEquals(2, values3.size());
+ for (int i = 0; i < values1.size(); i++) {
+ IntWritable w = (IntWritable) values3.get(i);
+ TestCase.assertEquals(i, w.get());
+ }
+
+ List<Object> values4 = (List<Object>) vew.writeValue(cv, 1);
+ TestCase.assertEquals(3, values4.size());
+ for (int i = 0; i < values2.size(); i++) {
+ IntWritable w = (IntWritable) values4.get(i);
+ TestCase.assertEquals(2 + i, w.get());
+ }
+
+ List<Object> values5 = (List<Object>) vew.writeValue(cv, 2);
+ TestCase.assertNull(values5);
+ }
+
+ @SuppressWarnings("unchecked")
+ private void testMapLong() throws HiveException {
+ LongColumnVector kcv = VectorizedRowGroupGenUtil.generateLongColumnVector(true, false,
+ 5, new Random(10));
+ LongColumnVector vcv = VectorizedRowGroupGenUtil.generateLongColumnVector(true, false,
+ 5, new Random(10));
+ MapColumnVector cv = new MapColumnVector(3, kcv, vcv);
+ cv.init();
+
+ // set the offset and length for the two elements
+ cv.offsets[0] = 0;
+ cv.lengths[0] = 2;
+ cv.offsets[1] = 2;
+ cv.lengths[1] = 3;
+
+ // initialize the keys and values
+ for (int i = 0; i < 5; i++) {
+ kcv.vector[i] = i;
+ kcv.isNull[i] = false;
+ }
+ kcv.noNulls = true;
+
+ for (int i = 0; i < 5; i++) {
+ vcv.vector[i] = 5 + i;
+ vcv.isNull[i] = false;
+ }
+ vcv.noNulls = true;
+
+ cv.isNull[0] = false;
+ cv.isNull[1] = false;
+ cv.isNull[2] = true;
+ cv.noNulls = false;
+
+ SettableMapObjectInspector mapOI =
+ ObjectInspectorFactory.getStandardMapObjectInspector(
+ PrimitiveObjectInspectorFactory.writableIntObjectInspector,
+ PrimitiveObjectInspectorFactory.writableIntObjectInspector);
+
+ VectorExpressionWriter vew = VectorExpressionWriterFactory.genVectorExpressionWritable(mapOI);
+
+ Map<Object, Object> values1 = (Map<Object, Object>) mapOI.create();
+ vew.setValue(values1, cv, 0);
+
+ Map<Object, Object> values2 = (Map<Object, Object>) mapOI.create();
+ vew.setValue(values2, cv, 1);
+
+ TestCase.assertEquals(2, values1.size());
+ TestCase.assertEquals(3, values2.size());
+
+ for (int i = 0; i < values1.size(); i++) {
+ IntWritable key = new IntWritable(i);
+ IntWritable w = (IntWritable) values1.get(key);
+ TestCase.assertEquals(5 + i, w.get());
+ }
+
+ for (int i = 0; i < values2.size(); i++) {
+ IntWritable key = new IntWritable(2 + i);
+ IntWritable w = (IntWritable) values2.get(key);
+ TestCase.assertEquals(5 + 2 + i, w.get());
+ }
+
+ Map<Object, Object> values3 = (Map<Object, Object>) vew.writeValue(cv, 0);
+ TestCase.assertEquals(2, values3.size());
+ for (int i = 0; i < values1.size(); i++) {
+ IntWritable key = new IntWritable(i);
+ IntWritable w = (IntWritable) values1.get(key);
+ TestCase.assertEquals(5 + i, w.get());
+ }
+
+ Map<Object, Object> values4 = (Map<Object, Object>) vew.writeValue(cv, 1);
+ TestCase.assertEquals(3, values4.size());
+ for (int i = 0; i < values2.size(); i++) {
+ IntWritable key = new IntWritable(2 + i);
+ IntWritable w = (IntWritable) values2.get(key);
+ TestCase.assertEquals(5 + 2 + i, w.get());
+ }
+
+ Map<Object, Object> values5 = (Map<Object, Object>) vew.writeValue(cv, 2);
+ TestCase.assertNull(values5);
+ }
+
@Test
public void testVectorExpressionWriterDouble() throws HiveException {
testWriterDouble(TypeInfoFactory.doubleTypeInfo);
@@ -506,4 +662,14 @@ public class TestVectorExpressionWriters {
public void testVectorExpressionSetterBinary() throws HiveException {
testSetterText(TypeInfoFactory.binaryTypeInfo);
}
+
+ @Test
+ public void testVectorExpressionListLong() throws HiveException {
+ testListLong();
+ }
+
+ @Test
+ public void testVectorExpressionMapLong() throws HiveException {
+ testMapLong();
+ }
}