You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2017/05/11 18:10:31 UTC
[7/7] hive git commit: HIVE-16557: Vectorization: Specialize
ReduceSink empty key case (Matt McCline, reviewed by Gopal Vijayaraghavan)
HIVE-16557: Vectorization: Specialize ReduceSink empty key case (Matt McCline, reviewed by Gopal Vijayaraghavan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6bfa2491
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6bfa2491
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6bfa2491
Branch: refs/heads/master
Commit: 6bfa2491bfa8b13626632c3b3ba7e87bcb0f04ae
Parents: 162f592
Author: Matt McCline <mm...@hortonworks.com>
Authored: Thu May 11 13:10:16 2017 -0500
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Thu May 11 13:10:16 2017 -0500
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../VectorReduceSinkCommonOperator.java | 79 +-
.../VectorReduceSinkEmptyKeyOperator.java | 177 ++
.../VectorReduceSinkLongOperator.java | 3 +-
.../VectorReduceSinkMultiKeyOperator.java | 4 +-
.../VectorReduceSinkObjectHashOperator.java | 126 +-
.../VectorReduceSinkStringOperator.java | 3 +-
.../VectorReduceSinkUniformHashOperator.java | 48 +-
.../hive/ql/optimizer/physical/Vectorizer.java | 193 +-
.../hadoop/hive/ql/plan/ReduceSinkDesc.java | 54 +-
.../hive/ql/plan/VectorReduceSinkDesc.java | 28 +
.../clientpositive/vector_windowing_navfn.q | 134 ++
.../queries/clientpositive/windowing_navfn.q | 2 +
.../llap/llap_vector_nohybridgrace.q.out | 4 +-
.../llap/vector_auto_smb_mapjoin_14.q.out | 2 +-
.../llap/vector_between_columns.q.out | 4 +-
.../clientpositive/llap/vector_between_in.q.out | 6 +-
.../llap/vector_binary_join_groupby.q.out | 2 +-
.../llap/vector_char_simple.q.out | 2 +-
.../clientpositive/llap/vector_coalesce.q.out | 4 +-
.../llap/vector_count_distinct.q.out | 2 +-
.../llap/vector_empty_where.q.out | 8 +-
.../llap/vector_groupby_grouping_id3.q.out | 4 +
.../llap/vector_groupby_mapjoin.q.out | 4 +-
.../clientpositive/llap/vector_inner_join.q.out | 18 +
.../clientpositive/llap/vector_join30.q.out | 8 +-
.../llap/vector_join_part_col_char.q.out | 18 +-
.../llap/vector_leftsemi_mapjoin.q.out | 282 +++
.../clientpositive/llap/vector_order_null.q.out | 22 +
.../llap/vector_outer_join0.q.out | 4 +
.../llap/vector_outer_join1.q.out | 12 +-
.../llap/vector_outer_join2.q.out | 8 +-
.../llap/vector_partition_diff_num_cols.q.out | 10 +-
.../llap/vector_ptf_part_simple.q.out | 39 +
.../llap/vector_tablesample_rows.q.out | 2 +-
.../llap/vector_varchar_simple.q.out | 2 +-
.../llap/vector_windowing_navfn.q.out | 2113 ++++++++++++++++++
.../clientpositive/llap/vectorization_0.q.out | 12 +-
.../llap/vectorization_limit.q.out | 11 +
.../llap/vectorization_short_regress.q.out | 16 +-
.../clientpositive/llap/vectorized_case.q.out | 4 +-
.../llap/vectorized_date_funcs.q.out | 2 +-
.../vectorized_dynamic_semijoin_reduction.q.out | 32 +-
.../llap/vectorized_mapjoin2.q.out | 2 +-
.../clientpositive/llap/vectorized_ptf.q.out | 64 +
.../llap/vectorized_timestamp_funcs.q.out | 2 +-
.../spark/vector_between_in.q.out | 6 +-
.../spark/vector_count_distinct.q.out | 2 +-
.../spark/vector_outer_join1.q.out | 4 +-
.../spark/vector_outer_join2.q.out | 4 +-
.../clientpositive/spark/vectorization_0.q.out | 12 +-
.../spark/vectorization_short_regress.q.out | 16 +-
.../clientpositive/spark/vectorized_case.q.out | 4 +-
.../clientpositive/spark/vectorized_ptf.q.out | 75 +
.../spark/vectorized_timestamp_funcs.q.out | 2 +-
.../tez/vectorization_limit.q.out | 11 +
.../clientpositive/windowing_navfn.q.out | 62 +
57 files changed, 3490 insertions(+), 285 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 7510ddc..e2c3992 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -639,6 +639,7 @@ minillaplocal.query.files=acid_globallimit.q,\
vector_partitioned_date_time.q,\
vector_ptf_part_simple.q,\
vector_udf1.q,\
+ vector_windowing_navfn.q,\
vectorization_short_regress.q,\
vectorized_dynamic_partition_pruning.q,\
vectorized_dynamic_semijoin_reduction.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java
index a4dbe0b..99819cf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkCommonOperator.java
@@ -87,6 +87,7 @@ public abstract class VectorReduceSinkCommonOperator extends TerminalOperator<Re
// This is map of which vectorized row batch columns are the key columns.
// And, their types.
+ protected boolean isEmptyKey;
protected int[] reduceSinkKeyColumnMap;
protected TypeInfo[] reduceSinkKeyTypeInfos;
@@ -95,6 +96,7 @@ public abstract class VectorReduceSinkCommonOperator extends TerminalOperator<Re
// This is map of which vectorized row batch columns are the value columns.
// And, their types.
+ protected boolean isEmptyValue;
protected int[] reduceSinkValueColumnMap;
protected TypeInfo[] reduceSinkValueTypeInfos;
@@ -166,15 +168,21 @@ public abstract class VectorReduceSinkCommonOperator extends TerminalOperator<Re
vectorReduceSinkInfo = vectorDesc.getVectorReduceSinkInfo();
this.vContext = vContext;
- // Since a key expression can be a calculation and the key will go into a scratch column,
- // we need the mapping and type information.
- reduceSinkKeyColumnMap = vectorReduceSinkInfo.getReduceSinkKeyColumnMap();
- reduceSinkKeyTypeInfos = vectorReduceSinkInfo.getReduceSinkKeyTypeInfos();
- reduceSinkKeyExpressions = vectorReduceSinkInfo.getReduceSinkKeyExpressions();
+ isEmptyKey = vectorDesc.getIsEmptyKey();
+ if (!isEmptyKey) {
+ // Since a key expression can be a calculation and the key will go into a scratch column,
+ // we need the mapping and type information.
+ reduceSinkKeyColumnMap = vectorReduceSinkInfo.getReduceSinkKeyColumnMap();
+ reduceSinkKeyTypeInfos = vectorReduceSinkInfo.getReduceSinkKeyTypeInfos();
+ reduceSinkKeyExpressions = vectorReduceSinkInfo.getReduceSinkKeyExpressions();
+ }
- reduceSinkValueColumnMap = vectorReduceSinkInfo.getReduceSinkValueColumnMap();
- reduceSinkValueTypeInfos = vectorReduceSinkInfo.getReduceSinkValueTypeInfos();
- reduceSinkValueExpressions = vectorReduceSinkInfo.getReduceSinkValueExpressions();
+ isEmptyValue = vectorDesc.getIsEmptyValue();
+ if (!isEmptyValue) {
+ reduceSinkValueColumnMap = vectorReduceSinkInfo.getReduceSinkValueColumnMap();
+ reduceSinkValueTypeInfos = vectorReduceSinkInfo.getReduceSinkValueTypeInfos();
+ reduceSinkValueExpressions = vectorReduceSinkInfo.getReduceSinkValueExpressions();
+ }
}
// Get the sort order
@@ -311,26 +319,33 @@ public abstract class VectorReduceSinkCommonOperator extends TerminalOperator<Re
LOG.info("Using tag = " + (int) reduceTagByte);
}
- TableDesc keyTableDesc = conf.getKeySerializeInfo();
- boolean[] columnSortOrder =
- getColumnSortOrder(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length);
- byte[] columnNullMarker =
- getColumnNullMarker(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length, columnSortOrder);
- byte[] columnNotNullMarker =
- getColumnNotNullMarker(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length, columnSortOrder);
-
- keyBinarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrder,
- columnNullMarker, columnNotNullMarker);
+ if (!isEmptyKey) {
+ TableDesc keyTableDesc = conf.getKeySerializeInfo();
+ boolean[] columnSortOrder =
+ getColumnSortOrder(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length);
+ byte[] columnNullMarker =
+ getColumnNullMarker(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length, columnSortOrder);
+ byte[] columnNotNullMarker =
+ getColumnNotNullMarker(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length, columnSortOrder);
+
+ keyBinarySortableSerializeWrite =
+ new BinarySortableSerializeWrite(
+ columnSortOrder,
+ columnNullMarker,
+ columnNotNullMarker);
+ }
- valueLazyBinarySerializeWrite = new LazyBinarySerializeWrite(reduceSinkValueColumnMap.length);
+ if (!isEmptyValue) {
+ valueLazyBinarySerializeWrite = new LazyBinarySerializeWrite(reduceSinkValueColumnMap.length);
- valueVectorSerializeRow =
- new VectorSerializeRow<LazyBinarySerializeWrite>(
- valueLazyBinarySerializeWrite);
- valueVectorSerializeRow.init(reduceSinkValueTypeInfos, reduceSinkValueColumnMap);
+ valueVectorSerializeRow =
+ new VectorSerializeRow<LazyBinarySerializeWrite>(
+ valueLazyBinarySerializeWrite);
+ valueVectorSerializeRow.init(reduceSinkValueTypeInfos, reduceSinkValueColumnMap);
- valueOutput = new Output();
- valueVectorSerializeRow.setOutput(valueOutput);
+ valueOutput = new Output();
+ valueVectorSerializeRow.setOutput(valueOutput);
+ }
keyWritable = new HiveKey();
@@ -347,6 +362,20 @@ public abstract class VectorReduceSinkCommonOperator extends TerminalOperator<Re
batchCounter = 0;
}
+ protected void initializeEmptyKey(int tag) {
+
+ // Use the same logic as ReduceSinkOperator.toHiveKey.
+ //
+ if (tag == -1 || reduceSkipTag) {
+ keyWritable.setSize(0);
+ } else {
+ keyWritable.setSize(1);
+ keyWritable.get()[0] = reduceTagByte;
+ }
+ keyWritable.setDistKeyLength(0);
+ keyWritable.setHashCode(0);
+ }
+
// The collect method override for TopNHash.BinaryCollector
@Override
public void collect(byte[] key, byte[] value, int hash) throws IOException {
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkEmptyKeyOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkEmptyKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkEmptyKeyOperator.java
new file mode 100644
index 0000000..bb7d677
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkEmptyKeyOperator.java
@@ -0,0 +1,177 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.reducesink;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.CompilationOpContext;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.Counter;
+import org.apache.hadoop.hive.ql.exec.TerminalOperator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow;
+import org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.keyseries.VectorKeySeriesSerialized;
+import org.apache.hadoop.hive.ql.io.HiveKey;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc;
+import org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo;
+import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe;
+import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
+import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hive.common.util.HashCodeUtil;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * This class is the UniformHash empty key operator class for native vectorized reduce sink.
+ *
+ * Since there is no key, we initialize the keyWritable once with an empty value.
+ */
+public class VectorReduceSinkEmptyKeyOperator extends VectorReduceSinkCommonOperator {
+
+ private static final long serialVersionUID = 1L;
+ private static final String CLASS_NAME = VectorReduceSinkEmptyKeyOperator.class.getName();
+ private static final Log LOG = LogFactory.getLog(CLASS_NAME);
+
+ // The above members are initialized by the constructor and must not be
+ // transient.
+ //---------------------------------------------------------------------------
+
+ private transient boolean isKeyInitialized;
+
+ /** Kryo ctor. */
+ protected VectorReduceSinkEmptyKeyOperator() {
+ super();
+ }
+
+ public VectorReduceSinkEmptyKeyOperator(CompilationOpContext ctx) {
+ super(ctx);
+ }
+
+ public VectorReduceSinkEmptyKeyOperator(CompilationOpContext ctx,
+ VectorizationContext vContext, OperatorDesc conf) throws HiveException {
+ super(ctx, vContext, conf);
+
+ LOG.info("VectorReduceSinkEmptyKeyOperator constructor vectorReduceSinkInfo " + vectorReduceSinkInfo);
+
+ }
+
+ @Override
+ protected void initializeOp(Configuration hconf) throws HiveException {
+ super.initializeOp(hconf);
+
+ isKeyInitialized = false;
+
+ }
+
+ @Override
+ public void process(Object row, int tag) throws HiveException {
+
+ try {
+
+ VectorizedRowBatch batch = (VectorizedRowBatch) row;
+
+ batchCounter++;
+
+ if (batch.size == 0) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
+ }
+ return;
+ }
+
+ if (!isKeyInitialized) {
+ isKeyInitialized = true;
+ Preconditions.checkState(isEmptyKey);
+ initializeEmptyKey(tag);
+ }
+
+ // Perform any value expressions. Results will go into scratch columns.
+ if (reduceSinkValueExpressions != null) {
+ for (VectorExpression ve : reduceSinkValueExpressions) {
+ ve.evaluate(batch);
+ }
+ }
+
+ final int size = batch.size;
+ if (!isEmptyValue) {
+ if (batch.selectedInUse) {
+ int[] selected = batch.selected;
+ for (int logical = 0; logical < size; logical++) {
+ final int batchIndex = selected[logical];
+
+ valueLazyBinarySerializeWrite.reset();
+ valueVectorSerializeRow.serializeWrite(batch, batchIndex);
+
+ valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
+
+ collect(keyWritable, valueBytesWritable);
+ }
+ } else {
+ for (int batchIndex = 0; batchIndex < size; batchIndex++) {
+ valueLazyBinarySerializeWrite.reset();
+ valueVectorSerializeRow.serializeWrite(batch, batchIndex);
+
+ valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
+
+ collect(keyWritable, valueBytesWritable);
+ }
+ }
+ } else {
+
+ // Empty value, too.
+ for (int i = 0; i < size; i++) {
+ collect(keyWritable, valueBytesWritable);
+ }
+ }
+ } catch (Exception e) {
+ throw new HiveException(e);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkLongOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkLongOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkLongOperator.java
index 0bc1cd1..84fb9d3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkLongOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkLongOperator.java
@@ -30,7 +30,8 @@ import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerialize
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
/*
- * Specialized class for native vectorized reduce sink that is reducing on a single long key column.
+ * Specialized class for native vectorized reduce sink that is reducing on a Uniform Hash
+ * single long key column.
*/
public class VectorReduceSinkLongOperator extends VectorReduceSinkUniformHashOperator {
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkMultiKeyOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkMultiKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkMultiKeyOperator.java
index 1cca94d..383cc90 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkMultiKeyOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkMultiKeyOperator.java
@@ -29,8 +29,8 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
/*
- * Specialized class for native vectorized reduce sink that is reducing on multiple key columns
- * (or a single non-long / non-string column).
+ * Specialized class for native vectorized reduce sink that is reducing on Uniform Hash
+ * multiple key columns (or a single non-long / non-string column).
*/
public class VectorReduceSinkMultiKeyOperator extends VectorReduceSinkUniformHashOperator {
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkObjectHashOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkObjectHashOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkObjectHashOperator.java
index bd7d6cb..15581ae 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkObjectHashOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkObjectHashOperator.java
@@ -69,7 +69,9 @@ import org.apache.hive.common.util.HashCodeUtil;
import com.google.common.base.Preconditions;
/**
- * This class is uniform hash (common) operator class for native vectorized reduce sink.
+ * This class is the object hash (not Uniform Hash) operator class for native vectorized reduce sink.
+ * It takes the "object" hash code of bucket and/or partition keys (which are often subsets of the
+ * reduce key). If the bucket and partition keys are empty, the hash will be a random number.
*/
public class VectorReduceSinkObjectHashOperator extends VectorReduceSinkCommonOperator {
@@ -77,11 +79,13 @@ public class VectorReduceSinkObjectHashOperator extends VectorReduceSinkCommonOp
private static final String CLASS_NAME = VectorReduceSinkObjectHashOperator.class.getName();
private static final Log LOG = LogFactory.getLog(CLASS_NAME);
+ protected boolean isEmptyBuckets;
protected int[] reduceSinkBucketColumnMap;
protected TypeInfo[] reduceSinkBucketTypeInfos;
protected VectorExpression[] reduceSinkBucketExpressions;
+ protected boolean isEmptyPartitions;
protected int[] reduceSinkPartitionColumnMap;
protected TypeInfo[] reduceSinkPartitionTypeInfos;
@@ -91,6 +95,8 @@ public class VectorReduceSinkObjectHashOperator extends VectorReduceSinkCommonOp
// transient.
//---------------------------------------------------------------------------
+ private transient boolean isKeyInitialized;
+
protected transient Output keyOutput;
protected transient VectorSerializeRow<BinarySortableSerializeWrite> keyVectorSerializeRow;
@@ -124,13 +130,19 @@ public class VectorReduceSinkObjectHashOperator extends VectorReduceSinkCommonOp
// This the is Object Hash class variation.
Preconditions.checkState(!vectorReduceSinkInfo.getUseUniformHash());
- reduceSinkBucketColumnMap = vectorReduceSinkInfo.getReduceSinkBucketColumnMap();
- reduceSinkBucketTypeInfos = vectorReduceSinkInfo.getReduceSinkBucketTypeInfos();
- reduceSinkBucketExpressions = vectorReduceSinkInfo.getReduceSinkBucketExpressions();
+ isEmptyBuckets = vectorDesc.getIsEmptyBuckets();
+ if (!isEmptyBuckets) {
+ reduceSinkBucketColumnMap = vectorReduceSinkInfo.getReduceSinkBucketColumnMap();
+ reduceSinkBucketTypeInfos = vectorReduceSinkInfo.getReduceSinkBucketTypeInfos();
+ reduceSinkBucketExpressions = vectorReduceSinkInfo.getReduceSinkBucketExpressions();
+ }
- reduceSinkPartitionColumnMap = vectorReduceSinkInfo.getReduceSinkPartitionColumnMap();
- reduceSinkPartitionTypeInfos = vectorReduceSinkInfo.getReduceSinkPartitionTypeInfos();
- reduceSinkPartitionExpressions = vectorReduceSinkInfo.getReduceSinkPartitionExpressions();
+ isEmptyPartitions = vectorDesc.getIsEmptyPartitions();
+ if (!isEmptyPartitions) {
+ reduceSinkPartitionColumnMap = vectorReduceSinkInfo.getReduceSinkPartitionColumnMap();
+ reduceSinkPartitionTypeInfos = vectorReduceSinkInfo.getReduceSinkPartitionTypeInfos();
+ reduceSinkPartitionExpressions = vectorReduceSinkInfo.getReduceSinkPartitionExpressions();
+ }
}
private ObjectInspector[] getObjectInspectorArray(TypeInfo[] typeInfos) {
@@ -149,31 +161,32 @@ public class VectorReduceSinkObjectHashOperator extends VectorReduceSinkCommonOp
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
- keyOutput = new Output();
- keyBinarySortableSerializeWrite.set(keyOutput);
- keyVectorSerializeRow =
- new VectorSerializeRow<BinarySortableSerializeWrite>(
- keyBinarySortableSerializeWrite);
- keyVectorSerializeRow.init(reduceSinkKeyTypeInfos, reduceSinkKeyColumnMap);
-
- hasBuckets = false;
- isPartitioned = false;
- numBuckets = 0;
-
+ if (!isEmptyKey) {
+
+ // For this variation, we serialize the key without caring if it single Long,
+ // single String, multi-key, etc.
+ keyOutput = new Output();
+ keyBinarySortableSerializeWrite.set(keyOutput);
+ keyVectorSerializeRow =
+ new VectorSerializeRow<BinarySortableSerializeWrite>(
+ keyBinarySortableSerializeWrite);
+ keyVectorSerializeRow.init(reduceSinkKeyTypeInfos, reduceSinkKeyColumnMap);
+ }
+
// Object Hash.
- numBuckets = conf.getNumBuckets();
- hasBuckets = (numBuckets > 0);
+ if (isEmptyBuckets) {
+ numBuckets = 0;
+ } else {
+ numBuckets = conf.getNumBuckets();
- if (hasBuckets) {
bucketObjectInspectors = getObjectInspectorArray(reduceSinkBucketTypeInfos);
bucketVectorExtractRow = new VectorExtractRow();
bucketVectorExtractRow.init(reduceSinkBucketTypeInfos, reduceSinkBucketColumnMap);
bucketFieldValues = new Object[reduceSinkBucketTypeInfos.length];
}
-
- isPartitioned = (conf.getPartitionCols() != null);
- if (!isPartitioned) {
+
+ if (isEmptyPartitions) {
nonPartitionRandom = new Random(12345);
} else {
partitionObjectInspectors = getObjectInspectorArray(reduceSinkPartitionTypeInfos);
@@ -199,6 +212,13 @@ public class VectorReduceSinkObjectHashOperator extends VectorReduceSinkCommonOp
return;
}
+ if (!isKeyInitialized) {
+ isKeyInitialized = true;
+ if (isEmptyKey) {
+ initializeEmptyKey(tag);
+ }
+ }
+
// Perform any key expressions. Results will go into scratch columns.
if (reduceSinkKeyExpressions != null) {
for (VectorExpression ve : reduceSinkKeyExpressions) {
@@ -226,21 +246,21 @@ public class VectorReduceSinkObjectHashOperator extends VectorReduceSinkCommonOp
ve.evaluate(batch);
}
}
-
+
final boolean selectedInUse = batch.selectedInUse;
int[] selected = batch.selected;
final int size = batch.size;
for (int logical = 0; logical < size; logical++) {
final int batchIndex = (selectedInUse ? selected[logical] : logical);
-
+
final int hashCode;
- if (!hasBuckets) {
- if (!isPartitioned) {
+ if (isEmptyBuckets) {
+ if (isEmptyPartitions) {
hashCode = nonPartitionRandom.nextInt();
} else {
partitionVectorExtractRow.extractRow(batch, batchIndex, partitionFieldValues);
- hashCode =
+ hashCode =
ObjectInspectorUtils.getBucketHashCode(
partitionFieldValues, partitionObjectInspectors);
}
@@ -249,36 +269,40 @@ public class VectorReduceSinkObjectHashOperator extends VectorReduceSinkCommonOp
final int bucketNum =
ObjectInspectorUtils.getBucketNumber(
bucketFieldValues, bucketObjectInspectors, numBuckets);
- if (!isPartitioned) {
+ if (isEmptyPartitions) {
hashCode = nonPartitionRandom.nextInt() * 31 + bucketNum;
} else {
partitionVectorExtractRow.extractRow(batch, batchIndex, partitionFieldValues);
- hashCode =
+ hashCode =
ObjectInspectorUtils.getBucketHashCode(
partitionFieldValues, partitionObjectInspectors) * 31 + bucketNum;
}
}
-
- keyBinarySortableSerializeWrite.reset();
- keyVectorSerializeRow.serializeWrite(batch, batchIndex);
-
- // One serialized key for 1 or more rows for the duplicate keys.
- final int keyLength = keyOutput.getLength();
- if (tag == -1 || reduceSkipTag) {
- keyWritable.set(keyOutput.getData(), 0, keyLength);
- } else {
- keyWritable.setSize(keyLength + 1);
- System.arraycopy(keyOutput.getData(), 0, keyWritable.get(), 0, keyLength);
- keyWritable.get()[keyLength] = reduceTagByte;
+
+ if (!isEmptyKey) {
+ keyBinarySortableSerializeWrite.reset();
+ keyVectorSerializeRow.serializeWrite(batch, batchIndex);
+
+ // One serialized key for 1 or more rows for the duplicate keys.
+ final int keyLength = keyOutput.getLength();
+ if (tag == -1 || reduceSkipTag) {
+ keyWritable.set(keyOutput.getData(), 0, keyLength);
+ } else {
+ keyWritable.setSize(keyLength + 1);
+ System.arraycopy(keyOutput.getData(), 0, keyWritable.get(), 0, keyLength);
+ keyWritable.get()[keyLength] = reduceTagByte;
+ }
+ keyWritable.setDistKeyLength(keyLength);
+ keyWritable.setHashCode(hashCode);
}
- keyWritable.setDistKeyLength(keyLength);
- keyWritable.setHashCode(hashCode);
-
- valueLazyBinarySerializeWrite.reset();
- valueVectorSerializeRow.serializeWrite(batch, batchIndex);
-
- valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
-
+
+ if (!isEmptyValue) {
+ valueLazyBinarySerializeWrite.reset();
+ valueVectorSerializeRow.serializeWrite(batch, batchIndex);
+
+ valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
+ }
+
collect(keyWritable, valueBytesWritable);
}
} catch (Exception e) {
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkStringOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkStringOperator.java
index a838f4c..51e8531 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkStringOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkStringOperator.java
@@ -30,7 +30,8 @@ import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerialize
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
/*
- * Specialized class for native vectorized reduce sink that is reducing on a single long key column.
+ * Specialized class for native vectorized reduce sink that is reducing on a Uniform Hash
+ * single long key column.
*/
public class VectorReduceSinkStringOperator extends VectorReduceSinkUniformHashOperator {
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkUniformHashOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkUniformHashOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkUniformHashOperator.java
index 2dfa721..3acae94 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkUniformHashOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkUniformHashOperator.java
@@ -66,8 +66,12 @@ import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hive.common.util.HashCodeUtil;
+import com.google.common.base.Preconditions;
+
/**
* This class is uniform hash (common) operator class for native vectorized reduce sink.
+ * There are variation operators for Long, String, and MultiKey. And, a special case operator
+ * for no key (VectorReduceSinkEmptyKeyOperator).
*/
public abstract class VectorReduceSinkUniformHashOperator extends VectorReduceSinkCommonOperator {
@@ -105,6 +109,7 @@ public abstract class VectorReduceSinkUniformHashOperator extends VectorReduceSi
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
+ Preconditions.checkState(!isEmptyKey);
// Create all nulls key.
try {
Output nullKeyOutput = new Output();
@@ -155,10 +160,7 @@ public abstract class VectorReduceSinkUniformHashOperator extends VectorReduceSi
boolean selectedInUse = batch.selectedInUse;
int[] selected = batch.selected;
- int keyLength;
int logical;
- int end;
- int batchIndex;
do {
if (serializedKeySeries.getCurrentIsAllNull()) {
@@ -179,7 +181,7 @@ public abstract class VectorReduceSinkUniformHashOperator extends VectorReduceSi
// One serialized key for 1 or more rows for the duplicate keys.
// LOG.info("reduceSkipTag " + reduceSkipTag + " tag " + tag + " reduceTagByte " + (int) reduceTagByte + " keyLength " + serializedKeySeries.getSerializedLength());
// LOG.info("process offset " + serializedKeySeries.getSerializedStart() + " length " + serializedKeySeries.getSerializedLength());
- keyLength = serializedKeySeries.getSerializedLength();
+ final int keyLength = serializedKeySeries.getSerializedLength();
if (tag == -1 || reduceSkipTag) {
keyWritable.set(serializedKeySeries.getSerializedBytes(),
serializedKeySeries.getSerializedStart(), keyLength);
@@ -194,18 +196,38 @@ public abstract class VectorReduceSinkUniformHashOperator extends VectorReduceSi
}
logical = serializedKeySeries.getCurrentLogical();
- end = logical + serializedKeySeries.getCurrentDuplicateCount();
- do {
- batchIndex = (selectedInUse ? selected[logical] : logical);
+ final int end = logical + serializedKeySeries.getCurrentDuplicateCount();
+ if (!isEmptyValue) {
+ if (selectedInUse) {
+ do {
+ final int batchIndex = selected[logical];
+
+ valueLazyBinarySerializeWrite.reset();
+ valueVectorSerializeRow.serializeWrite(batch, batchIndex);
+
+ valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
+
+ collect(keyWritable, valueBytesWritable);
+ } while (++logical < end);
+ } else {
+ do {
+ valueLazyBinarySerializeWrite.reset();
+ valueVectorSerializeRow.serializeWrite(batch, logical);
- valueLazyBinarySerializeWrite.reset();
- valueVectorSerializeRow.serializeWrite(batch, batchIndex);
+ valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
- valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
+ collect(keyWritable, valueBytesWritable);
+ } while (++logical < end);
+
+ }
+ } else {
+
+ // Empty value, too.
+ do {
+ collect(keyWritable, valueBytesWritable);
+ } while (++logical < end);
+ }
- collect(keyWritable, valueBytesWritable);
- } while (++logical < end);
-
if (!serializedKeySeries.next()) {
break;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 420e5b8..737aad1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -61,6 +61,7 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiString
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator;
+import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkEmptyKeyOperator;
import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator;
import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator;
import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkObjectHashOperator;
@@ -2930,13 +2931,15 @@ public class Vectorizer implements PhysicalPlanResolver {
Operator<? extends OperatorDesc> op, VectorizationContext vContext, ReduceSinkDesc desc,
VectorReduceSinkInfo vectorReduceSinkInfo) throws HiveException {
+ VectorReduceSinkDesc vectorDesc = (VectorReduceSinkDesc) desc.getVectorDesc();
+
Type[] reduceSinkKeyColumnVectorTypes = vectorReduceSinkInfo.getReduceSinkKeyColumnVectorTypes();
// By default, we can always use the multi-key class.
VectorReduceSinkDesc.ReduceSinkKeyType reduceSinkKeyType = VectorReduceSinkDesc.ReduceSinkKeyType.MULTI_KEY;
// Look for single column optimization.
- if (reduceSinkKeyColumnVectorTypes.length == 1) {
+ if (reduceSinkKeyColumnVectorTypes != null && reduceSinkKeyColumnVectorTypes.length == 1) {
LOG.info("Vectorizer vectorizeOperator groupby typeName " + vectorReduceSinkInfo.getReduceSinkKeyTypeInfos()[0]);
Type columnVectorType = reduceSinkKeyColumnVectorTypes[0];
switch (columnVectorType) {
@@ -2968,25 +2971,31 @@ public class Vectorizer implements PhysicalPlanResolver {
Class<? extends Operator<?>> opClass = null;
if (vectorReduceSinkInfo.getUseUniformHash()) {
- switch (reduceSinkKeyType) {
- case LONG:
- opClass = VectorReduceSinkLongOperator.class;
- break;
- case STRING:
- opClass = VectorReduceSinkStringOperator.class;
- break;
- case MULTI_KEY:
- opClass = VectorReduceSinkMultiKeyOperator.class;
- break;
- default:
- throw new HiveException("Unknown reduce sink key type " + reduceSinkKeyType);
+ if (vectorDesc.getIsEmptyKey()) {
+ opClass = VectorReduceSinkEmptyKeyOperator.class;
+ } else {
+ switch (reduceSinkKeyType) {
+ case LONG:
+ opClass = VectorReduceSinkLongOperator.class;
+ break;
+ case STRING:
+ opClass = VectorReduceSinkStringOperator.class;
+ break;
+ case MULTI_KEY:
+ opClass = VectorReduceSinkMultiKeyOperator.class;
+ break;
+ default:
+ throw new HiveException("Unknown reduce sink key type " + reduceSinkKeyType);
+ }
}
} else {
- opClass = VectorReduceSinkObjectHashOperator.class;
+ if (vectorDesc.getIsEmptyKey() && vectorDesc.getIsEmptyBuckets() && vectorDesc.getIsEmptyPartitions()) {
+ opClass = VectorReduceSinkEmptyKeyOperator.class;
+ } else {
+ opClass = VectorReduceSinkObjectHashOperator.class;
+ }
}
- VectorReduceSinkDesc vectorDesc = (VectorReduceSinkDesc) desc.getVectorDesc();
-
vectorDesc.setReduceSinkKeyType(reduceSinkKeyType);
vectorDesc.setVectorReduceSinkInfo(vectorReduceSinkInfo);
@@ -3044,88 +3053,87 @@ public class Vectorizer implements PhysicalPlanResolver {
// So if we later decide not to specialize, we'll just waste any scratch columns allocated...
List<ExprNodeDesc> keysDescs = desc.getKeyCols();
- VectorExpression[] allKeyExpressions = vContext.getVectorExpressions(keysDescs);
-
- // Since a key expression can be a calculation and the key will go into a scratch column,
- // we need the mapping and type information.
- int[] reduceSinkKeyColumnMap = new int[allKeyExpressions.length];
- TypeInfo[] reduceSinkKeyTypeInfos = new TypeInfo[allKeyExpressions.length];
- Type[] reduceSinkKeyColumnVectorTypes = new Type[allKeyExpressions.length];
- ArrayList<VectorExpression> groupByKeyExpressionsList = new ArrayList<VectorExpression>();
- VectorExpression[] reduceSinkKeyExpressions;
- for (int i = 0; i < reduceSinkKeyColumnMap.length; i++) {
- VectorExpression ve = allKeyExpressions[i];
- reduceSinkKeyColumnMap[i] = ve.getOutputColumn();
- reduceSinkKeyTypeInfos[i] = keysDescs.get(i).getTypeInfo();
- reduceSinkKeyColumnVectorTypes[i] =
- VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkKeyTypeInfos[i]);
- if (!IdentityExpression.isColumnOnly(ve)) {
- groupByKeyExpressionsList.add(ve);
+ final boolean isEmptyKey = (keysDescs.size() == 0);
+ if (!isEmptyKey) {
+
+ VectorExpression[] allKeyExpressions = vContext.getVectorExpressions(keysDescs);
+
+ final int[] reduceSinkKeyColumnMap = new int[allKeyExpressions.length];
+ final TypeInfo[] reduceSinkKeyTypeInfos = new TypeInfo[allKeyExpressions.length];
+ final Type[] reduceSinkKeyColumnVectorTypes = new Type[allKeyExpressions.length];
+ final VectorExpression[] reduceSinkKeyExpressions;
+
+ // Since a key expression can be a calculation and the key will go into a scratch column,
+ // we need the mapping and type information.
+ ArrayList<VectorExpression> groupByKeyExpressionsList = new ArrayList<VectorExpression>();
+ for (int i = 0; i < reduceSinkKeyColumnMap.length; i++) {
+ VectorExpression ve = allKeyExpressions[i];
+ reduceSinkKeyColumnMap[i] = ve.getOutputColumn();
+ reduceSinkKeyTypeInfos[i] = keysDescs.get(i).getTypeInfo();
+ reduceSinkKeyColumnVectorTypes[i] =
+ VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkKeyTypeInfos[i]);
+ if (!IdentityExpression.isColumnOnly(ve)) {
+ groupByKeyExpressionsList.add(ve);
+ }
+ }
+ if (groupByKeyExpressionsList.size() == 0) {
+ reduceSinkKeyExpressions = null;
+ } else {
+ reduceSinkKeyExpressions = groupByKeyExpressionsList.toArray(new VectorExpression[0]);
}
- }
- if (groupByKeyExpressionsList.size() == 0) {
- reduceSinkKeyExpressions = null;
- } else {
- reduceSinkKeyExpressions = groupByKeyExpressionsList.toArray(new VectorExpression[0]);
+
+ vectorReduceSinkInfo.setReduceSinkKeyColumnMap(reduceSinkKeyColumnMap);
+ vectorReduceSinkInfo.setReduceSinkKeyTypeInfos(reduceSinkKeyTypeInfos);
+ vectorReduceSinkInfo.setReduceSinkKeyColumnVectorTypes(reduceSinkKeyColumnVectorTypes);
+ vectorReduceSinkInfo.setReduceSinkKeyExpressions(reduceSinkKeyExpressions);
+
}
ArrayList<ExprNodeDesc> valueDescs = desc.getValueCols();
- VectorExpression[] allValueExpressions = vContext.getVectorExpressions(valueDescs);
-
- int[] reduceSinkValueColumnMap = new int[valueDescs.size()];
- TypeInfo[] reduceSinkValueTypeInfos = new TypeInfo[valueDescs.size()];
- Type[] reduceSinkValueColumnVectorTypes = new Type[valueDescs.size()];
- ArrayList<VectorExpression> reduceSinkValueExpressionsList = new ArrayList<VectorExpression>();
- VectorExpression[] reduceSinkValueExpressions;
- for (int i = 0; i < valueDescs.size(); ++i) {
- VectorExpression ve = allValueExpressions[i];
- reduceSinkValueColumnMap[i] = ve.getOutputColumn();
- reduceSinkValueTypeInfos[i] = valueDescs.get(i).getTypeInfo();
- reduceSinkValueColumnVectorTypes[i] =
- VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkValueTypeInfos[i]);
- if (!IdentityExpression.isColumnOnly(ve)) {
- reduceSinkValueExpressionsList.add(ve);
+ final boolean isEmptyValue = (valueDescs.size() == 0);
+ if (!isEmptyValue) {
+ VectorExpression[] allValueExpressions = vContext.getVectorExpressions(valueDescs);
+
+ final int[] reduceSinkValueColumnMap = new int[allValueExpressions.length];
+ final TypeInfo[] reduceSinkValueTypeInfos = new TypeInfo[allValueExpressions.length];
+ final Type[] reduceSinkValueColumnVectorTypes = new Type[allValueExpressions.length];
+ VectorExpression[] reduceSinkValueExpressions;
+
+ ArrayList<VectorExpression> reduceSinkValueExpressionsList = new ArrayList<VectorExpression>();
+ for (int i = 0; i < valueDescs.size(); ++i) {
+ VectorExpression ve = allValueExpressions[i];
+ reduceSinkValueColumnMap[i] = ve.getOutputColumn();
+ reduceSinkValueTypeInfos[i] = valueDescs.get(i).getTypeInfo();
+ reduceSinkValueColumnVectorTypes[i] =
+ VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkValueTypeInfos[i]);
+ if (!IdentityExpression.isColumnOnly(ve)) {
+ reduceSinkValueExpressionsList.add(ve);
+ }
+ }
+ if (reduceSinkValueExpressionsList.size() == 0) {
+ reduceSinkValueExpressions = null;
+ } else {
+ reduceSinkValueExpressions = reduceSinkValueExpressionsList.toArray(new VectorExpression[0]);
}
- }
- if (reduceSinkValueExpressionsList.size() == 0) {
- reduceSinkValueExpressions = null;
- } else {
- reduceSinkValueExpressions = reduceSinkValueExpressionsList.toArray(new VectorExpression[0]);
- }
- vectorReduceSinkInfo.setReduceSinkKeyColumnMap(reduceSinkKeyColumnMap);
- vectorReduceSinkInfo.setReduceSinkKeyTypeInfos(reduceSinkKeyTypeInfos);
- vectorReduceSinkInfo.setReduceSinkKeyColumnVectorTypes(reduceSinkKeyColumnVectorTypes);
- vectorReduceSinkInfo.setReduceSinkKeyExpressions(reduceSinkKeyExpressions);
+ vectorReduceSinkInfo.setReduceSinkValueColumnMap(reduceSinkValueColumnMap);
+ vectorReduceSinkInfo.setReduceSinkValueTypeInfos(reduceSinkValueTypeInfos);
+ vectorReduceSinkInfo.setReduceSinkValueColumnVectorTypes(reduceSinkValueColumnVectorTypes);
+ vectorReduceSinkInfo.setReduceSinkValueExpressions(reduceSinkValueExpressions);
- vectorReduceSinkInfo.setReduceSinkValueColumnMap(reduceSinkValueColumnMap);
- vectorReduceSinkInfo.setReduceSinkValueTypeInfos(reduceSinkValueTypeInfos);
- vectorReduceSinkInfo.setReduceSinkValueColumnVectorTypes(reduceSinkValueColumnVectorTypes);
- vectorReduceSinkInfo.setReduceSinkValueExpressions(reduceSinkValueExpressions);
+ }
boolean useUniformHash = desc.getReducerTraits().contains(UNIFORM);
vectorReduceSinkInfo.setUseUniformHash(useUniformHash);
- boolean hasEmptyBuckets = false;
- boolean hasNoPartitions = false;
- if (useUniformHash) {
-
- // Check for unexpected conditions...
- hasEmptyBuckets =
- (desc.getBucketCols() != null && !desc.getBucketCols().isEmpty()) ||
- (desc.getPartitionCols().size() == 0);
+ List<ExprNodeDesc> bucketDescs = desc.getBucketCols();
+ final boolean isEmptyBuckets = (bucketDescs == null || bucketDescs.size() == 0);
+ List<ExprNodeDesc> partitionDescs = desc.getPartitionCols();
+ final boolean isEmptyPartitions = (partitionDescs == null || partitionDescs.size() == 0);
- if (hasEmptyBuckets) {
- LOG.info("Unexpected condition: UNIFORM hash and empty buckets");
- isUnexpectedCondition = true;
- }
-
- hasNoPartitions = (desc.getPartitionCols() == null);
+ if (useUniformHash || (isEmptyKey && isEmptyBuckets && isEmptyPartitions)) {
- if (hasNoPartitions) {
- LOG.info("Unexpected condition: UNIFORM hash and no partitions");
- isUnexpectedCondition = true;
- }
+ // NOTE: For Uniform Hash or no buckets/partitions, when the key is empty, we will use the VectorReduceSinkEmptyKeyOperator instead.
} else {
@@ -3136,10 +3144,9 @@ public class Vectorizer implements PhysicalPlanResolver {
Type[] reduceSinkBucketColumnVectorTypes = null;
VectorExpression[] reduceSinkBucketExpressions = null;
- List<ExprNodeDesc> bucketDescs = desc.getBucketCols();
- if (bucketDescs != null) {
+ if (!isEmptyBuckets) {
VectorExpression[] allBucketExpressions = vContext.getVectorExpressions(bucketDescs);
-
+
reduceSinkBucketColumnMap = new int[bucketDescs.size()];
reduceSinkBucketTypeInfos = new TypeInfo[bucketDescs.size()];
reduceSinkBucketColumnVectorTypes = new Type[bucketDescs.size()];
@@ -3166,10 +3173,9 @@ public class Vectorizer implements PhysicalPlanResolver {
Type[] reduceSinkPartitionColumnVectorTypes = null;
VectorExpression[] reduceSinkPartitionExpressions = null;
- List<ExprNodeDesc> partitionDescs = desc.getPartitionCols();
- if (partitionDescs != null) {
+ if (!isEmptyPartitions) {
VectorExpression[] allPartitionExpressions = vContext.getVectorExpressions(partitionDescs);
-
+
reduceSinkPartitionColumnMap = new int[partitionDescs.size()];
reduceSinkPartitionTypeInfos = new TypeInfo[partitionDescs.size()];
reduceSinkPartitionColumnVectorTypes = new Type[partitionDescs.size()];
@@ -3205,6 +3211,10 @@ public class Vectorizer implements PhysicalPlanResolver {
// Remember the condition variables for EXPLAIN regardless.
vectorDesc.setIsVectorizationReduceSinkNativeEnabled(isVectorizationReduceSinkNativeEnabled);
vectorDesc.setEngine(engine);
+ vectorDesc.setIsEmptyKey(isEmptyKey);
+ vectorDesc.setIsEmptyValue(isEmptyValue);
+ vectorDesc.setIsEmptyBuckets(isEmptyBuckets);
+ vectorDesc.setIsEmptyPartitions(isEmptyPartitions);
vectorDesc.setHasPTFTopN(hasPTFTopN);
vectorDesc.setHasDistinctColumns(hasDistinctColumns);
vectorDesc.setIsKeyBinarySortable(isKeyBinarySortable);
@@ -3217,7 +3227,6 @@ public class Vectorizer implements PhysicalPlanResolver {
// Many restrictions.
if (!isVectorizationReduceSinkNativeEnabled ||
!isTezOrSpark ||
- (useUniformHash && (hasEmptyBuckets || hasNoPartitions)) ||
hasPTFTopN ||
hasDistinctColumns ||
!isKeyBinarySortable ||
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java
index 0b103b9..24636c5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java
@@ -522,10 +522,62 @@ public class ReduceSinkDesc extends AbstractOperatorDesc {
return vectorExpressionsToStringList(vectorReduceSinkInfo.getReduceSinkValueExpressions());
}
+ @Explain(vectorization = Vectorization.DETAIL, displayName = "keyColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED })
+ public String getKeyColumns() {
+ if (!isNative) {
+ return null;
+ }
+ int[] keyColumnMap = vectorReduceSinkInfo.getReduceSinkKeyColumnMap();
+ if (keyColumnMap == null) {
+ // Always show an array.
+ keyColumnMap = new int[0];
+ }
+ return Arrays.toString(keyColumnMap);
+ }
+
+ @Explain(vectorization = Vectorization.DETAIL, displayName = "valueColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED })
+ public String getValueColumns() {
+ if (!isNative) {
+ return null;
+ }
+ int[] valueColumnMap = vectorReduceSinkInfo.getReduceSinkValueColumnMap();
+ if (valueColumnMap == null) {
+ // Always show an array.
+ valueColumnMap = new int[0];
+ }
+ return Arrays.toString(valueColumnMap);
+ }
+
+ @Explain(vectorization = Vectorization.DETAIL, displayName = "bucketColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED })
+ public String getBucketColumns() {
+ if (!isNative) {
+ return null;
+ }
+ int[] bucketColumnMap = vectorReduceSinkInfo.getReduceSinkBucketColumnMap();
+ if (bucketColumnMap == null || bucketColumnMap.length == 0) {
+ // Suppress empty column map.
+ return null;
+ }
+ return Arrays.toString(bucketColumnMap);
+ }
+
+ @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED })
+ public String getPartitionColumns() {
+ if (!isNative) {
+ return null;
+ }
+ int[] partitionColumnMap = vectorReduceSinkInfo.getReduceSinkPartitionColumnMap();
+ if (partitionColumnMap == null || partitionColumnMap.length == 0) {
+ // Suppress empty column map.
+ return null;
+ }
+ return Arrays.toString(partitionColumnMap);
+ }
+
private VectorizationCondition[] createNativeConditions() {
boolean enabled = vectorReduceSinkDesc.getIsVectorizationReduceSinkNativeEnabled();
-
+
String engine = vectorReduceSinkDesc.getEngine();
String engineInSupportedCondName =
HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableReduceSinkNativeEngines;
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorReduceSinkDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorReduceSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorReduceSinkDesc.java
index 91d5be7..f148eb4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorReduceSinkDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorReduceSinkDesc.java
@@ -64,6 +64,10 @@ public class VectorReduceSinkDesc extends AbstractVectorDesc {
private boolean isVectorizationReduceSinkNativeEnabled;
private String engine;
+ private boolean isEmptyKey;
+ private boolean isEmptyValue;
+ private boolean isEmptyBuckets;
+ private boolean isEmptyPartitions;
private boolean hasPTFTopN;
private boolean hasDistinctColumns;
private boolean isKeyBinarySortable;
@@ -85,6 +89,30 @@ public class VectorReduceSinkDesc extends AbstractVectorDesc {
public String getEngine() {
return engine;
}
+ public void setIsEmptyKey(boolean isEmptyKey) {
+ this.isEmptyKey = isEmptyKey;
+ }
+ public boolean getIsEmptyKey() {
+ return isEmptyKey;
+ }
+ public void setIsEmptyValue(boolean isEmptyValue) {
+ this.isEmptyValue = isEmptyValue;
+ }
+ public boolean getIsEmptyValue() {
+ return isEmptyValue;
+ }
+ public void setIsEmptyBuckets(boolean isEmptyBuckets) {
+ this.isEmptyBuckets = isEmptyBuckets;
+ }
+ public boolean getIsEmptyBuckets() {
+ return isEmptyBuckets;
+ }
+ public void setIsEmptyPartitions(boolean isEmptyPartitions) {
+ this.isEmptyPartitions = isEmptyPartitions;
+ }
+ public boolean getIsEmptyPartitions() {
+ return isEmptyPartitions;
+ }
public void setHasPTFTopN(boolean hasPTFTopN) {
this.hasPTFTopN = hasPTFTopN;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/test/queries/clientpositive/vector_windowing_navfn.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_windowing_navfn.q b/ql/src/test/queries/clientpositive/vector_windowing_navfn.q
new file mode 100644
index 0000000..9acbe97
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_windowing_navfn.q
@@ -0,0 +1,134 @@
+set hive.explain.user=false;
+set hive.cli.print.header=true;
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+drop table over10k;
+
+create table over10k(
+ t tinyint,
+ si smallint,
+ i int,
+ b bigint,
+ f float,
+ d double,
+ bo boolean,
+ s string,
+ ts timestamp,
+ `dec` decimal(4,2),
+ bin binary)
+ row format delimited
+ fields terminated by '|';
+
+load data local inpath '../../data/files/over10k' into table over10k;
+
+explain vectorization detail
+select row_number() over() from src where key = '238';
+
+select row_number() over() from src where key = '238';
+
+explain vectorization detail
+select s, row_number() over (partition by d order by `dec`) from over10k limit 100;
+
+select s, row_number() over (partition by d order by `dec`) from over10k limit 100;
+
+explain vectorization detail
+select i, lead(s) over (partition by bin order by d,i desc) from over10k limit 100;
+
+select i, lead(s) over (partition by bin order by d,i desc) from over10k limit 100;
+
+explain vectorization detail
+select i, lag(`dec`) over (partition by i order by s,i,`dec`) from over10k limit 100;
+
+select i, lag(`dec`) over (partition by i order by s,i,`dec`) from over10k limit 100;
+
+explain vectorization detail
+select s, last_value(t) over (partition by d order by f) from over10k limit 100;
+
+select s, last_value(t) over (partition by d order by f) from over10k limit 100;
+
+explain vectorization detail
+select s, first_value(s) over (partition by bo order by s) from over10k limit 100;
+
+select s, first_value(s) over (partition by bo order by s) from over10k limit 100;
+
+explain vectorization detail
+select t, s, i, last_value(i) over (partition by t order by s)
+from over10k where (s = 'oscar allen' or s = 'oscar carson') and t = 10;
+
+-- select t, s, i, last_value(i) over (partition by t order by s)
+-- from over10k where (s = 'oscar allen' or s = 'oscar carson') and t = 10;
+
+drop table if exists wtest;
+create table wtest as
+select a, b
+from
+(
+SELECT explode(
+ map(
+ 3, array(1,2,3,4,5),
+ 1, array(int(null),int(null),int(null), int(null), int(null)),
+ 2, array(1,null,2, null, 3)
+ )
+ ) as (a,barr) FROM (select * from src limit 1) s
+ ) s1 lateral view explode(barr) arr as b;
+
+explain vectorization detail
+select a, b,
+first_value(b) over (partition by a order by b rows between 1 preceding and 1 following ) ,
+first_value(b, true) over (partition by a order by b rows between 1 preceding and 1 following ) ,
+first_value(b) over (partition by a order by b rows between unbounded preceding and 1 following ) ,
+first_value(b, true) over (partition by a order by b rows between unbounded preceding and 1 following )
+from wtest;
+
+select a, b,
+first_value(b) over (partition by a order by b rows between 1 preceding and 1 following ) ,
+first_value(b, true) over (partition by a order by b rows between 1 preceding and 1 following ) ,
+first_value(b) over (partition by a order by b rows between unbounded preceding and 1 following ) ,
+first_value(b, true) over (partition by a order by b rows between unbounded preceding and 1 following )
+from wtest;
+
+explain vectorization detail
+select a, b,
+first_value(b) over (partition by a order by b desc rows between 1 preceding and 1 following ) ,
+first_value(b, true) over (partition by a order by b desc rows between 1 preceding and 1 following ) ,
+first_value(b) over (partition by a order by b desc rows between unbounded preceding and 1 following ) ,
+first_value(b, true) over (partition by a order by b desc rows between unbounded preceding and 1 following )
+from wtest;
+
+select a, b,
+first_value(b) over (partition by a order by b desc rows between 1 preceding and 1 following ) ,
+first_value(b, true) over (partition by a order by b desc rows between 1 preceding and 1 following ) ,
+first_value(b) over (partition by a order by b desc rows between unbounded preceding and 1 following ) ,
+first_value(b, true) over (partition by a order by b desc rows between unbounded preceding and 1 following )
+from wtest;
+
+explain vectorization detail
+select a, b,
+last_value(b) over (partition by a order by b rows between 1 preceding and 1 following ) ,
+last_value(b, true) over (partition by a order by b rows between 1 preceding and 1 following ) ,
+last_value(b) over (partition by a order by b rows between unbounded preceding and 1 following ) ,
+last_value(b, true) over (partition by a order by b rows between unbounded preceding and 1 following )
+from wtest;
+
+select a, b,
+last_value(b) over (partition by a order by b rows between 1 preceding and 1 following ) ,
+last_value(b, true) over (partition by a order by b rows between 1 preceding and 1 following ) ,
+last_value(b) over (partition by a order by b rows between unbounded preceding and 1 following ) ,
+last_value(b, true) over (partition by a order by b rows between unbounded preceding and 1 following )
+from wtest;
+
+explain vectorization detail
+select a, b,
+last_value(b) over (partition by a order by b desc rows between 1 preceding and 1 following ) ,
+last_value(b, true) over (partition by a order by b desc rows between 1 preceding and 1 following ) ,
+last_value(b) over (partition by a order by b desc rows between unbounded preceding and 1 following ) ,
+last_value(b, true) over (partition by a order by b desc rows between unbounded preceding and 1 following )
+from wtest;
+
+select a, b,
+last_value(b) over (partition by a order by b desc rows between 1 preceding and 1 following ) ,
+last_value(b, true) over (partition by a order by b desc rows between 1 preceding and 1 following ) ,
+last_value(b) over (partition by a order by b desc rows between unbounded preceding and 1 following ) ,
+last_value(b, true) over (partition by a order by b desc rows between unbounded preceding and 1 following )
+from wtest;
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/test/queries/clientpositive/windowing_navfn.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/windowing_navfn.q b/ql/src/test/queries/clientpositive/windowing_navfn.q
index f2ec9fc..7c27e22 100644
--- a/ql/src/test/queries/clientpositive/windowing_navfn.q
+++ b/ql/src/test/queries/clientpositive/windowing_navfn.q
@@ -17,6 +17,8 @@ create table over10k(
load data local inpath '../../data/files/over10k' into table over10k;
+explain select row_number() over() from src where key = '238';
+
select row_number() over() from src where key = '238';
select s, row_number() over (partition by d order by `dec`) from over10k limit 100;
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out b/ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out
index 57ddc96..d26400d 100644
--- a/ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out
+++ b/ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out
@@ -81,7 +81,7 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
@@ -259,7 +259,7 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out
index 3b44bc3..a98c34f 100644
--- a/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out
@@ -285,7 +285,7 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out b/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
index d6f6ec8..bcf9cd2 100644
--- a/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
@@ -162,7 +162,7 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
@@ -329,7 +329,7 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
index ae1b2d2..18dd1c6 100644
--- a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
@@ -163,7 +163,7 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
@@ -367,7 +367,7 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
@@ -763,7 +763,7 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
index a35659a..160a43b 100644
--- a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
@@ -183,7 +183,7 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out b/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out
index 5e1dea8..47c709f 100644
--- a/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out
@@ -251,7 +251,7 @@ STAGE PLANS:
className: VectorLimitOperator
native: true
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Execution mode: vectorized, llap
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out b/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out
index 358d8ae..8402104 100644
--- a/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out
@@ -248,7 +248,7 @@ STAGE PLANS:
native: true
projectedOutputColumns: []
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Execution mode: vectorized, llap
@@ -456,7 +456,7 @@ STAGE PLANS:
native: true
projectedOutputColumns: []
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Execution mode: vectorized, llap
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out b/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
index 18e16cf..b9d0f06 100644
--- a/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
@@ -1329,7 +1329,7 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/test/results/clientpositive/llap/vector_empty_where.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_empty_where.q.out b/ql/src/test/results/clientpositive/llap/vector_empty_where.q.out
index 81dfac6..f2bc0a5 100644
--- a/ql/src/test/results/clientpositive/llap/vector_empty_where.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_empty_where.q.out
@@ -109,7 +109,7 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
@@ -264,7 +264,7 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
@@ -427,7 +427,7 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
@@ -590,7 +590,7 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out
index 1c67983..a4ef2e7 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out
@@ -100,9 +100,11 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int), _col1 (type: int), 1 (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkMultiKeyOperator
+ keyColumns: [0, 1, 4]
keyExpressions: ConstantVectorExpression(val 1) -> 4:long
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumns: [3]
Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
Execution mode: vectorized, llap
@@ -265,8 +267,10 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
Reduce Sink Vectorization:
className: VectorReduceSinkMultiKeyOperator
+ keyColumns: [0, 1, 2]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumns: [3]
Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
Execution mode: vectorized, llap
http://git-wip-us.apache.org/repos/asf/hive/blob/6bfa2491/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
index 7bfbd6f..e412844 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
@@ -140,7 +140,7 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
@@ -245,7 +245,7 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ className: VectorReduceSinkEmptyKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE