You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by li...@apache.org on 2017/07/24 03:04:32 UTC
hive git commit: HIVE-17114: HoS: Possible skew in shuffling when data is not really skewed (Rui reviewed by Chao)

Repository: hive
Updated Branches:
  refs/heads/master 637fba222 -> 9a8533148


HIVE-17114: HoS: Possible skew in shuffling when data is not really skewed (Rui reviewed by Chao)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9a853314
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9a853314
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9a853314

Branch: refs/heads/master
Commit: 9a8533148694332615cfa5d8ddb159491cd9b92c
Parents: 637fba2
Author: Rui Li <li...@apache.org>
Authored: Mon Jul 24 11:04:26 2017 +0800
Committer: Rui Li <li...@apache.org>
Committed: Mon Jul 24 11:04:26 2017 +0800

----------------------------------------------------------------------
 .../spark/SetSparkReducerParallelism.java       | 114 ++++++++++---------
 .../queries/clientpositive/constprog_semijoin.q |   2 +
 .../queries/clientpositive/ppd_join_filter.q    |   2 +
 .../results/clientpositive/spark/groupby3.q.out |   2 +-
 .../clientpositive/spark/union_remove_10.q.out  |   2 +-
 .../clientpositive/spark/union_remove_13.q.out  |   2 +-
 .../clientpositive/spark/union_remove_15.q.out  |   2 +-
 .../clientpositive/spark/union_remove_16.q.out  |   2 +-
 .../clientpositive/spark/union_remove_7.q.out   |   2 +-
 .../clientpositive/spark/union_remove_8.q.out   |   2 +-
 .../clientpositive/spark/union_remove_9.q.out   |   2 +-
 .../spark/vector_between_in.q.out               |   8 +-
 .../spark/vector_cast_constant.q.out            |   2 +-
 .../spark/vector_count_distinct.q.out           |   2 +-
 .../spark/vector_decimal_aggregate.q.out        |   4 +-
 .../spark/vector_distinct_2.q.out               |   2 +-
 .../clientpositive/spark/vector_groupby_3.q.out |   2 +-
 .../spark/vector_mapjoin_reduce.q.out           |   4 +-
 .../clientpositive/spark/vector_orderby_5.q.out |   2 +-
 .../spark/vector_string_concat.q.out            |   2 +-
 .../clientpositive/spark/vectorization_12.q.out |   3 +-
 .../clientpositive/spark/vectorization_13.q.out |   5 +-
 .../clientpositive/spark/vectorization_14.q.out |   3 +-
 .../clientpositive/spark/vectorization_15.q.out |   3 +-
 .../clientpositive/spark/vectorization_16.q.out |   3 +-
 .../clientpositive/spark/vectorization_9.q.out  |   3 +-
 .../spark/vectorization_short_regress.q.out     |   8 +-
 .../clientpositive/spark/vectorized_ptf.q.out   |  33 ++----
 .../spark/vectorized_shufflejoin.q.out          |   4 +-
 29 files changed, 113 insertions(+), 114 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java
index e808a4f..1999747 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java
@@ -18,10 +18,13 @@
 
 package org.apache.hadoop.hive.ql.optimizer.spark;
 
+import java.util.Collection;
+import java.util.EnumSet;
 import java.util.List;
 import java.util.Set;
 import java.util.Stack;
 
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.common.ObjectPair;
@@ -50,6 +53,8 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
 import org.apache.hadoop.hive.ql.stats.StatsUtils;
 
+import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNIFORM;
+
 /**
  * SetSparkReducerParallelism determines how many reducers should
  * be run for a given reduce sink, clone from SetReducerParallelism.
@@ -120,41 +125,64 @@ public class SetSparkReducerParallelism implements NodeProcessor {
           }
         }
 
-        long numberOfBytes = 0;
-
-        if (useOpStats) {
-          // we need to add up all the estimates from the siblings of this reduce sink
-          for (Operator<? extends OperatorDesc> sibling
-              : sink.getChildOperators().get(0).getParentOperators()) {
-            if (sibling.getStatistics() != null) {
-              numberOfBytes = StatsUtils.safeAdd(numberOfBytes, sibling.getStatistics().getDataSize());
-              if (LOG.isDebugEnabled()) {
-                LOG.debug("Sibling " + sibling + " has stats: " + sibling.getStatistics());
-              }
-            } else {
-              LOG.warn("No stats available from: " + sibling);
-            }
-          }
-        } else if (parentSinks.isEmpty()) {
-          // Not using OP stats and this is the first sink in the path, meaning that
-          // we should use TS stats to infer parallelism
-          for (Operator<? extends OperatorDesc> sibling
-              : sink.getChildOperators().get(0).getParentOperators()) {
-            Set<TableScanOperator> sources =
-                OperatorUtils.findOperatorsUpstream(sibling, TableScanOperator.class);
-            for (TableScanOperator source : sources) {
-              if (source.getStatistics() != null) {
-                numberOfBytes = StatsUtils.safeAdd(numberOfBytes, source.getStatistics().getDataSize());
+        if (useOpStats || parentSinks.isEmpty()) {
+          long numberOfBytes = 0;
+          if (useOpStats) {
+            // we need to add up all the estimates from the siblings of this reduce sink
+            for (Operator<? extends OperatorDesc> sibling
+                : sink.getChildOperators().get(0).getParentOperators()) {
+              if (sibling.getStatistics() != null) {
+                numberOfBytes = StatsUtils.safeAdd(numberOfBytes, sibling.getStatistics().getDataSize());
                 if (LOG.isDebugEnabled()) {
-                  LOG.debug("Table source " + source + " has stats: " + source.getStatistics());
+                  LOG.debug("Sibling " + sibling + " has stats: " + sibling.getStatistics());
                 }
               } else {
-                LOG.warn("No stats available from table source: " + source);
+                LOG.warn("No stats available from: " + sibling);
               }
             }
+          } else {
+            // Not using OP stats and this is the first sink in the path, meaning that
+            // we should use TS stats to infer parallelism
+            for (Operator<? extends OperatorDesc> sibling
+                : sink.getChildOperators().get(0).getParentOperators()) {
+              Set<TableScanOperator> sources =
+                  OperatorUtils.findOperatorsUpstream(sibling, TableScanOperator.class);
+              for (TableScanOperator source : sources) {
+                if (source.getStatistics() != null) {
+                  numberOfBytes = StatsUtils.safeAdd(numberOfBytes, source.getStatistics().getDataSize());
+                  if (LOG.isDebugEnabled()) {
+                    LOG.debug("Table source " + source + " has stats: " + source.getStatistics());
+                  }
+                } else {
+                  LOG.warn("No stats available from table source: " + source);
+                }
+              }
+            }
+            LOG.debug("Gathered stats for sink " + sink + ". Total size is "
+                + numberOfBytes + " bytes.");
+          }
+
+          // Divide it by 2 so that we can have more reducers
+          long bytesPerReducer = context.getConf().getLongVar(HiveConf.ConfVars.BYTESPERREDUCER) / 2;
+          int numReducers = Utilities.estimateReducers(numberOfBytes, bytesPerReducer,
+              maxReducers, false);
+
+          getSparkMemoryAndCores(context);
+          if (sparkMemoryAndCores != null &&
+              sparkMemoryAndCores.getFirst() > 0 && sparkMemoryAndCores.getSecond() > 0) {
+            // warn the user if bytes per reducer is much larger than memory per task
+            if ((double) sparkMemoryAndCores.getFirst() / bytesPerReducer < 0.5) {
+              LOG.warn("Average load of a reducer is much larger than its available memory. " +
+                  "Consider decreasing hive.exec.reducers.bytes.per.reducer");
+            }
+
+            // If there are more cores, use the number of cores
+            numReducers = Math.max(numReducers, sparkMemoryAndCores.getSecond());
           }
-          LOG.debug("Gathered stats for sink " + sink + ". Total size is "
-              + numberOfBytes + " bytes.");
+          numReducers = Math.min(numReducers, maxReducers);
+          LOG.info("Set parallelism for reduce sink " + sink + " to: " + numReducers +
+              " (calculated)");
+          desc.setNumReducers(numReducers);
         } else {
           // Use the maximum parallelism from all parent reduce sinks
           int numberOfReducers = 0;
@@ -164,30 +192,14 @@ public class SetSparkReducerParallelism implements NodeProcessor {
           desc.setNumReducers(numberOfReducers);
           LOG.debug("Set parallelism for sink " + sink + " to " + numberOfReducers
               + " based on its parents");
-          return false;
         }
-
-        // Divide it by 2 so that we can have more reducers
-        long bytesPerReducer = context.getConf().getLongVar(HiveConf.ConfVars.BYTESPERREDUCER) / 2;
-        int numReducers = Utilities.estimateReducers(numberOfBytes, bytesPerReducer,
-            maxReducers, false);
-
-        getSparkMemoryAndCores(context);
-        if (sparkMemoryAndCores != null &&
-            sparkMemoryAndCores.getFirst() > 0 && sparkMemoryAndCores.getSecond() > 0) {
-          // warn the user if bytes per reducer is much larger than memory per task
-          if ((double) sparkMemoryAndCores.getFirst() / bytesPerReducer < 0.5) {
-            LOG.warn("Average load of a reducer is much larger than its available memory. " +
-                "Consider decreasing hive.exec.reducers.bytes.per.reducer");
-          }
-
-          // If there are more cores, use the number of cores
-          numReducers = Math.max(numReducers, sparkMemoryAndCores.getSecond());
+        final Collection<ExprNodeDesc.ExprNodeDescEqualityWrapper> keyCols =
+            ExprNodeDesc.ExprNodeDescEqualityWrapper.transform(desc.getKeyCols());
+        final Collection<ExprNodeDesc.ExprNodeDescEqualityWrapper> partCols =
+            ExprNodeDesc.ExprNodeDescEqualityWrapper.transform(desc.getPartitionCols());
+        if (keyCols != null && keyCols.equals(partCols)) {
+          desc.setReducerTraits(EnumSet.of(UNIFORM));
         }
-        numReducers = Math.min(numReducers, maxReducers);
-        LOG.info("Set parallelism for reduce sink " + sink + " to: " + numReducers +
-            " (calculated)");
-        desc.setNumReducers(numReducers);
       }
     } else {
       LOG.info("Number of reducers for sink " + sink + " was already determined to be: " + desc.getNumReducers());

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/queries/clientpositive/constprog_semijoin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/constprog_semijoin.q b/ql/src/test/queries/clientpositive/constprog_semijoin.q
index f282c27..a5546ec 100644
--- a/ql/src/test/queries/clientpositive/constprog_semijoin.q
+++ b/ql/src/test/queries/clientpositive/constprog_semijoin.q
@@ -1,6 +1,8 @@
 set hive.optimize.constant.propagation=true;
 set hive.explain.user=true;
 
+-- SORT_QUERY_RESULTS
+
 create table table1 (id int, val string, val1 string, dimid int);
 insert into table1 (id, val, val1, dimid) values (1, 't1val01', 'val101', 100), (2, 't1val02', 'val102', 200), (3, 't1val03', 'val103', 103), (3, 't1val01', 'val104', 100), (2, 't1val05', 'val105', 200), (3, 't1val01', 'val106', 103), (1, 't1val07', 'val107', 200), (2, 't1val01', 'val108', 200), (3, 't1val09', 'val109', 103), (4,'t1val01', 'val110', 200);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/queries/clientpositive/ppd_join_filter.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/ppd_join_filter.q b/ql/src/test/queries/clientpositive/ppd_join_filter.q
index 418f4c5..e909b3d 100644
--- a/ql/src/test/queries/clientpositive/ppd_join_filter.q
+++ b/ql/src/test/queries/clientpositive/ppd_join_filter.q
@@ -2,6 +2,8 @@ set hive.mapred.mode=nonstrict;
 set hive.optimize.ppd=true;
 set hive.ppd.remove.duplicatefilters=false;
 
+-- SORT_QUERY_RESULTS
+
 explain extended select a.key, b.k2, b.k3
 from src a
 join (

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/groupby3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby3.q.out b/ql/src/test/results/clientpositive/spark/groupby3.q.out
index 23871ba..a34e89e1 100644
--- a/ql/src/test/results/clientpositive/spark/groupby3.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby3.q.out
@@ -148,4 +148,4 @@ POSTHOOK: query: SELECT dest1.* FROM dest1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@dest1
 #### A masked pattern was here ####
-130091.0	260.182	256.10355987055016	98.0	0.0	142.9268095075238	143.06995106518906	20428.072876000002	20469.010897795593
+130091.0	260.182	256.10355987055016	98.0	0.0	142.92680950752379	143.06995106518903	20428.07287599999	20469.010897795582

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/union_remove_10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_10.q.out b/ql/src/test/results/clientpositive/spark/union_remove_10.q.out
index 7ae8479..18438d2 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_10.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_10.q.out
@@ -232,7 +232,7 @@ Retention:          	0
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
 	numFiles            	4                   
-	totalSize           	350                 
+	totalSize           	348                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/union_remove_13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_13.q.out b/ql/src/test/results/clientpositive/spark/union_remove_13.q.out
index 0d0d841..e83f927 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_13.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_13.q.out
@@ -246,7 +246,7 @@ Retention:          	0
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
 	numFiles            	3                   
-	totalSize           	271                 
+	totalSize           	269                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/union_remove_15.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_15.q.out b/ql/src/test/results/clientpositive/spark/union_remove_15.q.out
index 96ee309..320d869 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_15.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_15.q.out
@@ -178,7 +178,7 @@ Table Parameters:
 	numPartitions       	2                   
 	numRows             	0                   
 	rawDataSize         	0                   
-	totalSize           	336                 
+	totalSize           	332                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/union_remove_16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_16.q.out b/ql/src/test/results/clientpositive/spark/union_remove_16.q.out
index 8ea2b00..04b668b 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_16.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_16.q.out
@@ -220,7 +220,7 @@ Table Parameters:
 	numPartitions       	2                   
 	numRows             	0                   
 	rawDataSize         	0                   
-	totalSize           	336                 
+	totalSize           	332                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/union_remove_7.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_7.q.out b/ql/src/test/results/clientpositive/spark/union_remove_7.q.out
index 7341d98..0fabbac 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_7.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_7.q.out
@@ -157,7 +157,7 @@ Retention:          	0
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
 	numFiles            	4                   
-	totalSize           	336                 
+	totalSize           	332                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/union_remove_8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_8.q.out b/ql/src/test/results/clientpositive/spark/union_remove_8.q.out
index 06ba030..c96f591 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_8.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_8.q.out
@@ -182,7 +182,7 @@ Retention:          	0
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
 	numFiles            	4                   
-	totalSize           	350                 
+	totalSize           	348                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/union_remove_9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/union_remove_9.q.out b/ql/src/test/results/clientpositive/spark/union_remove_9.q.out
index 183169b..550b2ad 100644
--- a/ql/src/test/results/clientpositive/spark/union_remove_9.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_remove_9.q.out
@@ -240,7 +240,7 @@ Retention:          	0
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
 	numFiles            	4                   
-	totalSize           	350                 
+	totalSize           	348                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
index 976d9d9..a752256 100644
--- a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
@@ -1114,7 +1114,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: boolean)
                         Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
+                            className: VectorReduceSinkLongOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
@@ -1254,7 +1254,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: boolean)
                         Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
+                            className: VectorReduceSinkLongOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
@@ -1394,7 +1394,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: boolean)
                         Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
+                            className: VectorReduceSinkLongOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
@@ -1534,7 +1534,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: boolean)
                         Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
+                            className: VectorReduceSinkLongOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out
index c69bc81..f64a6af 100644
--- a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out
@@ -161,7 +161,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
                         Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
+                            className: VectorReduceSinkLongOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out b/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out
index 9af0786..81b0e15 100644
--- a/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out
@@ -1281,7 +1281,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
                         Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
+                            className: VectorReduceSinkLongOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
index 9994f2b..9ec5a09 100644
--- a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
@@ -85,7 +85,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
                         Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
+                            className: VectorReduceSinkLongOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
@@ -247,7 +247,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
                         Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
+                            className: VectorReduceSinkLongOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out b/ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out
index aff53a6..3d13156 100644
--- a/ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out
@@ -156,7 +156,7 @@ STAGE PLANS:
                         sort order: ++
                         Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string)
                         Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
+                            className: VectorReduceSinkMultiKeyOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out b/ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out
index 83f8604..1761205 100644
--- a/ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out
@@ -158,7 +158,7 @@ STAGE PLANS:
                         sort order: ++
                         Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string)
                         Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
+                            className: VectorReduceSinkMultiKeyOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
index 558a2d0..e4f9162 100644
--- a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out
@@ -159,7 +159,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
                         Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
+                            className: VectorReduceSinkLongOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
@@ -436,7 +436,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
                         Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
+                            className: VectorReduceSinkLongOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out b/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out
index dc394c8..6af68f7 100644
--- a/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out
@@ -159,7 +159,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: boolean)
                         Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
+                            className: VectorReduceSinkLongOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out b/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out
index 17c79a5..92634d7 100644
--- a/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out
@@ -369,7 +369,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
                         Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
+                            className: VectorReduceSinkStringOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_12.q.out b/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
index c17043b..a707ac9 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
@@ -121,11 +121,10 @@ STAGE PLANS:
                           sort order: ++++
                           Map-reduce partition columns: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean)
                           Reduce Sink Vectorization:
-                              className: VectorReduceSinkObjectHashOperator
+                              className: VectorReduceSinkMultiKeyOperator
                               keyColumns: [0, 1, 2, 3]
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                              partitionColumns: [0, 1, 2, 3]
                               valueColumns: [4, 5, 6, 7, 8]
                           Statistics: Num rows: 5006 Data size: 153682 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col4 (type: bigint), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: struct<count:bigint,sum:double,input:double>), _col7 (type: bigint), _col8 (type: struct<count:bigint,sum:double,variance:double>)

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_13.q.out b/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
index bae304b..6ecd0b5 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
@@ -123,11 +123,10 @@ STAGE PLANS:
                           sort order: +++++
                           Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string)
                           Reduce Sink Vectorization:
-                              className: VectorReduceSinkObjectHashOperator
+                              className: VectorReduceSinkMultiKeyOperator
                               keyColumns: [0, 1, 2, 3, 4]
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                              partitionColumns: [0, 1, 2, 3, 4]
                               valueColumns: [5, 6, 7, 8, 9, 10]
                           Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: struct<count:bigint,sum:double,variance:double>), _col9 (type: float), _col10 (type: tinyint)
@@ -477,7 +476,7 @@ STAGE PLANS:
                           sort order: +++++
                           Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string)
                           Reduce Sink Vectorization:
-                              className: VectorReduceSinkObjectHashOperator
+                              className: VectorReduceSinkMultiKeyOperator
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                           Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_14.q.out b/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
index 9d52abe..15926be 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
@@ -124,11 +124,10 @@ STAGE PLANS:
                           sort order: +++++
                           Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean)
                           Reduce Sink Vectorization:
-                              className: VectorReduceSinkObjectHashOperator
+                              className: VectorReduceSinkMultiKeyOperator
                               keyColumns: [0, 1, 2, 3, 4]
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                              partitionColumns: [0, 1, 2, 3, 4]
                               valueColumns: [5, 6, 7, 8, 9, 10]
                           Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: float), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: bigint), _col9 (type: struct<count:bigint,sum:double,variance:double>), _col10 (type: struct<count:bigint,sum:double,variance:double>)

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/vectorization_15.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_15.q.out b/ql/src/test/results/clientpositive/spark/vectorization_15.q.out
index cc9ae1d..9a78ee5 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_15.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_15.q.out
@@ -119,11 +119,10 @@ STAGE PLANS:
                           sort order: +++++++
                           Map-reduce partition columns: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp)
                           Reduce Sink Vectorization:
-                              className: VectorReduceSinkObjectHashOperator
+                              className: VectorReduceSinkMultiKeyOperator
                               keyColumns: [0, 1, 2, 3, 4, 5, 6]
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                              partitionColumns: [0, 1, 2, 3, 4, 5, 6]
                               valueColumns: [7, 8, 9, 10, 11, 12]
                           Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: double), _col9 (type: struct<count:bigint,sum:double,variance:double>), _col10 (type: struct<count:bigint,sum:double,variance:double>), _col11 (type: struct<count:bigint,sum:double,variance:double>), _col12 (type: struct<count:bigint,sum:double,variance:double>)

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
index d5235aa..eb65dc3 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
@@ -96,11 +96,10 @@ STAGE PLANS:
                           sort order: +++
                           Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp)
                           Reduce Sink Vectorization:
-                              className: VectorReduceSinkObjectHashOperator
+                              className: VectorReduceSinkMultiKeyOperator
                               keyColumns: [0, 1, 2]
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                              partitionColumns: [0, 1, 2]
                               valueColumns: [3, 4, 5]
                           Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col3 (type: bigint), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: double)

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_9.q.out b/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
index d5235aa..eb65dc3 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
@@ -96,11 +96,10 @@ STAGE PLANS:
                           sort order: +++
                           Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp)
                           Reduce Sink Vectorization:
-                              className: VectorReduceSinkObjectHashOperator
+                              className: VectorReduceSinkMultiKeyOperator
                               keyColumns: [0, 1, 2]
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                              partitionColumns: [0, 1, 2]
                               valueColumns: [3, 4, 5]
                           Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col3 (type: bigint), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: double)

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
index 4d3e41a..bfb2e4c 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
@@ -2209,7 +2209,7 @@ STAGE PLANS:
                           sort order: +
                           Map-reduce partition columns: _col0 (type: smallint)
                           Reduce Sink Vectorization:
-                              className: VectorReduceSinkObjectHashOperator
+                              className: VectorReduceSinkLongOperator
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                           Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE
@@ -2487,7 +2487,7 @@ STAGE PLANS:
                           sort order: +
                           Map-reduce partition columns: _col0 (type: double)
                           Reduce Sink Vectorization:
-                              className: VectorReduceSinkObjectHashOperator
+                              className: VectorReduceSinkMultiKeyOperator
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                           Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE
@@ -2809,7 +2809,7 @@ STAGE PLANS:
                           sort order: ++
                           Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: string)
                           Reduce Sink Vectorization:
-                              className: VectorReduceSinkObjectHashOperator
+                              className: VectorReduceSinkMultiKeyOperator
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                           Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
@@ -3212,7 +3212,7 @@ STAGE PLANS:
                           sort order: +
                           Map-reduce partition columns: _col0 (type: boolean)
                           Reduce Sink Vectorization:
-                              className: VectorReduceSinkObjectHashOperator
+                              className: VectorReduceSinkLongOperator
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                           Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
index 1a155b6..b231d2c 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
@@ -368,11 +368,10 @@ STAGE PLANS:
                       sort order: +
                       Map-reduce partition columns: p_partkey (type: int)
                       Reduce Sink Vectorization:
-                          className: VectorReduceSinkObjectHashOperator
+                          className: VectorReduceSinkLongOperator
                           keyColumns: [0]
                           native: true
                           nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                          partitionColumns: [0]
                           valueColumns: [1, 2, 5]
                       Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
                       value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int)
@@ -410,11 +409,10 @@ STAGE PLANS:
                       sort order: +
                       Map-reduce partition columns: p_partkey (type: int)
                       Reduce Sink Vectorization:
-                          className: VectorReduceSinkObjectHashOperator
+                          className: VectorReduceSinkLongOperator
                           keyColumns: [0]
                           native: true
                           nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                          partitionColumns: [0]
                           valueColumns: []
                       Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
@@ -1460,11 +1458,10 @@ STAGE PLANS:
                       sort order: +
                       Map-reduce partition columns: p_partkey (type: int)
                       Reduce Sink Vectorization:
-                          className: VectorReduceSinkObjectHashOperator
+                          className: VectorReduceSinkLongOperator
                           keyColumns: [0]
                           native: true
                           nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                          partitionColumns: [0]
                           valueColumns: []
                       Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
@@ -1638,11 +1635,10 @@ STAGE PLANS:
                       sort order: +
                       Map-reduce partition columns: p_partkey (type: int)
                       Reduce Sink Vectorization:
-                          className: VectorReduceSinkObjectHashOperator
+                          className: VectorReduceSinkLongOperator
                           keyColumns: [0]
                           native: true
                           nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                          partitionColumns: [0]
                           valueColumns: []
                       Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
@@ -2996,11 +2992,10 @@ STAGE PLANS:
                       sort order: +
                       Map-reduce partition columns: p_partkey (type: int)
                       Reduce Sink Vectorization:
-                          className: VectorReduceSinkObjectHashOperator
+                          className: VectorReduceSinkLongOperator
                           keyColumns: [0]
                           native: true
                           nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                          partitionColumns: [0]
                           valueColumns: []
                       Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
@@ -4184,11 +4179,10 @@ STAGE PLANS:
                     sort order: +
                     Map-reduce partition columns: p_mfgr (type: string)
                     Reduce Sink Vectorization:
-                        className: VectorReduceSinkObjectHashOperator
+                        className: VectorReduceSinkStringOperator
                         keyColumns: [2]
                         native: true
                         nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        partitionColumns: [2]
                         valueColumns: [1, 5]
                     Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
                     value expressions: p_name (type: string), p_size (type: int)
@@ -4496,11 +4490,10 @@ STAGE PLANS:
                     sort order: +
                     Map-reduce partition columns: p_mfgr (type: string)
                     Reduce Sink Vectorization:
-                        className: VectorReduceSinkObjectHashOperator
+                        className: VectorReduceSinkStringOperator
                         keyColumns: [2]
                         native: true
                         nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        partitionColumns: [2]
                         valueColumns: [1, 5]
                     Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
                     value expressions: p_name (type: string), p_size (type: int)
@@ -4803,11 +4796,10 @@ STAGE PLANS:
                     sort order: ++
                     Map-reduce partition columns: p_mfgr (type: string), p_name (type: string)
                     Reduce Sink Vectorization:
-                        className: VectorReduceSinkObjectHashOperator
+                        className: VectorReduceSinkMultiKeyOperator
                         keyColumns: [2, 1]
                         native: true
                         nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        partitionColumns: [2, 1]
                         valueColumns: [5]
                     Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
                     value expressions: p_size (type: int)
@@ -5121,11 +5113,10 @@ STAGE PLANS:
                     sort order: ++
                     Map-reduce partition columns: p_mfgr (type: string), p_name (type: string)
                     Reduce Sink Vectorization:
-                        className: VectorReduceSinkObjectHashOperator
+                        className: VectorReduceSinkMultiKeyOperator
                         keyColumns: [2, 1]
                         native: true
                         nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        partitionColumns: [2, 1]
                         valueColumns: [5]
                     Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
                     value expressions: p_size (type: int)
@@ -5447,11 +5438,10 @@ STAGE PLANS:
                     sort order: ++
                     Map-reduce partition columns: p_mfgr (type: string), p_name (type: string)
                     Reduce Sink Vectorization:
-                        className: VectorReduceSinkObjectHashOperator
+                        className: VectorReduceSinkMultiKeyOperator
                         keyColumns: [2, 1]
                         native: true
                         nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        partitionColumns: [2, 1]
                         valueColumns: [5]
                     Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
                     value expressions: p_size (type: int)
@@ -5743,11 +5733,10 @@ STAGE PLANS:
                     sort order: ++
                     Map-reduce partition columns: p_mfgr (type: string), p_name (type: string)
                     Reduce Sink Vectorization:
-                        className: VectorReduceSinkObjectHashOperator
+                        className: VectorReduceSinkMultiKeyOperator
                         keyColumns: [2, 1]
                         native: true
                         nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        partitionColumns: [2, 1]
                         valueColumns: [5]
                     Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
                     value expressions: p_size (type: int)

http://git-wip-us.apache.org/repos/asf/hive/blob/9a853314/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out
index 18c7db1..4d8620f 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out
@@ -51,7 +51,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
                         Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
+                            className: VectorReduceSinkLongOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
@@ -92,7 +92,7 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
                         Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
+                            className: VectorReduceSinkLongOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE