You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/08/06 01:05:48 UTC

svn commit: r1616043 [1/7] - in /hive/trunk: itests/qtest/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ph...

Author: hashutosh
Date: Tue Aug  5 23:05:47 2014
New Revision: 1616043

URL: http://svn.apache.org/r1616043
Log:
HIVE-7029 : Vectorize ReduceWork (Matt McCline via Jitendra Nath Pandey)

Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java
    hive/trunk/ql/src/test/results/clientpositive/tez/vector_decimal_aggregate.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_12.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_13.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_14.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_9.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_part_project.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_mapjoin.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_shufflejoin.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out
Modified:
    hive/trunk/itests/qtest/testconfiguration.properties
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
    hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q
    hive/trunk/ql/src/test/queries/clientpositive/vectorization_15.q
    hive/trunk/ql/src/test/queries/clientpositive/vectorization_16.q
    hive/trunk/ql/src/test/queries/clientpositive/vectorization_9.q
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_15.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorization_14.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorization_15.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorization_16.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorization_9.q.out

Modified: hive/trunk/itests/qtest/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/trunk/itests/qtest/testconfiguration.properties?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/itests/qtest/testconfiguration.properties (original)
+++ hive/trunk/itests/qtest/testconfiguration.properties Tue Aug  5 23:05:47 2014
@@ -1,5 +1,5 @@
 minimr.query.files=stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q,udf_using.q,empty_dir_in_table.q,temp_table_external.q
 minimr.query.negative.files=cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q,udf_local_resource.q
 minitez.query.files=tez_fsstat.q,mapjoin_decimal.q,tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q,tez_union.q,bucket_map_join_tez1.q,bucket_map_join_tez2.q,tez_schema_evolution.q,tez_join_hash.q
-minitez.query.files.shared=orc_merge1.q,orc_merge2.q,orc_merge3.q,orc_merge4.q,alter_merge_orc.q,alter_merge_2_orc.q,alter_merge_stats_orc.q,cross_product_check_1.q,cross_product_check_2.q,dynpart_sort_opt_vectorization.q,dynpart_sort_optimization.q,orc_analyze.q,join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q,transform1.q,transform2.q,transf
 orm_ppr1.q,transform_ppr2.q,script_env_var1.q,script_env_var2.q,script_pipe.q,scriptfile1.q,metadataonly1.q,temp_table.q,vectorized_ptf.q,optimize_nullscan.q,vector_cast_constant.q,vector_string_concat.q
+minitez.query.files.shared=orc_merge1.q,orc_merge2.q,orc_merge3.q,orc_merge4.q,alter_merge_orc.q,alter_merge_2_orc.q,alter_merge_stats_orc.q,cross_product_check_1.q,cross_product_check_2.q,dynpart_sort_opt_vectorization.q,dynpart_sort_optimization.q,orc_analyze.q,join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q,transform1.q,transform2.q,transf
 orm_ppr1.q,transform_ppr2.q,script_env_var1.q,script_env_var2.q,script_pipe.q,scriptfile1.q,metadataonly1.q,temp_table.q,vectorized_ptf.q,optimize_nullscan.q,vector_cast_constant.q,vector_string_concat.q,vector_decimal_aggregate.q,vector_left_outer_join.q,vectorization_12.q,vectorization_13.q,vectorization_14.q,vectorization_9.q,vectorization_part_project.q,vectorization_short_regress.q,vectorized_mapjoin.q,vectorized_nested_mapjoin.q,vectorized_shufflejoin.q,vectorized_timestamp_funcs.q
 beeline.positive.exclude=add_part_exist.q,alter1.q,alter2.q,alter4.q,alter5.q,alter_rename_partition.q,alter_rename_partition_authorization.q,archive.q,archive_corrupt.q,archive_multi.q,archive_mr_1806.q,archive_multi_mr_1806.q,authorization_1.q,authorization_2.q,authorization_4.q,authorization_5.q,authorization_6.q,authorization_7.q,ba_table1.q,ba_table2.q,ba_table3.q,ba_table_udfs.q,binary_table_bincolserde.q,binary_table_colserde.q,cluster.q,columnarserde_create_shortcut.q,combine2.q,constant_prop.q,create_nested_type.q,create_or_replace_view.q,create_struct_table.q,create_union_table.q,database.q,database_location.q,database_properties.q,ddltime.q,describe_database_json.q,drop_database_removes_partition_dirs.q,escape1.q,escape2.q,exim_00_nonpart_empty.q,exim_01_nonpart.q,exim_02_00_part_empty.q,exim_02_part.q,exim_03_nonpart_over_compat.q,exim_04_all_part.q,exim_04_evolved_parts.q,exim_05_some_part.q,exim_06_one_part.q,exim_07_all_part_over_nonoverlap.q,exim_08_nonpart_rename.q,
 exim_09_part_spec_nonoverlap.q,exim_10_external_managed.q,exim_11_managed_external.q,exim_12_external_location.q,exim_13_managed_location.q,exim_14_managed_location_over_existing.q,exim_15_external_part.q,exim_16_part_external.q,exim_17_part_managed.q,exim_18_part_external.q,exim_19_00_part_external_location.q,exim_19_part_external_location.q,exim_20_part_managed_location.q,exim_21_export_authsuccess.q,exim_22_import_exist_authsuccess.q,exim_23_import_part_authsuccess.q,exim_24_import_nonexist_authsuccess.q,global_limit.q,groupby_complex_types.q,groupby_complex_types_multi_single_reducer.q,index_auth.q,index_auto.q,index_auto_empty.q,index_bitmap.q,index_bitmap1.q,index_bitmap2.q,index_bitmap3.q,index_bitmap_auto.q,index_bitmap_rc.q,index_compact.q,index_compact_1.q,index_compact_2.q,index_compact_3.q,index_stale_partitioned.q,init_file.q,input16.q,input16_cc.q,input46.q,input_columnarserde.q,input_dynamicserde.q,input_lazyserde.q,input_testxpath3.q,input_testxpath4.q,insert2_overwr
 ite_partitions.q,insertexternal1.q,join_thrift.q,lateral_view.q,load_binary_data.q,load_exist_part_authsuccess.q,load_nonpart_authsuccess.q,load_part_authsuccess.q,loadpart_err.q,lock1.q,lock2.q,lock3.q,lock4.q,merge_dynamic_partition.q,multi_insert.q,multi_insert_move_tasks_share_dependencies.q,null_column.q,ppd_clusterby.q,query_with_semi.q,rename_column.q,sample6.q,sample_islocalmode_hook.q,set_processor_namespaces.q,show_tables.q,source.q,split_sample.q,str_to_map.q,transform1.q,udaf_collect_set.q,udaf_context_ngrams.q,udaf_histogram_numeric.q,udaf_ngrams.q,udaf_percentile_approx.q,udf_array.q,udf_bitmap_and.q,udf_bitmap_or.q,udf_explode.q,udf_format_number.q,udf_map.q,udf_map_keys.q,udf_map_values.q,udf_max.q,udf_min.q,udf_named_struct.q,udf_percentile.q,udf_printf.q,udf_sentences.q,udf_sort_array.q,udf_split.q,udf_struct.q,udf_substr.q,udf_translate.q,udf_union.q,udf_xpath.q,udtf_stack.q,view.q,virtual_column.q

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java Tue Aug  5 23:05:47 2014
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.hadoop.hive.ql.exec.vector.VectorExtractOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator;
@@ -113,6 +114,7 @@ public final class OperatorFactory {
     vectorOpvec.add(new OpTuple<FileSinkDesc>(FileSinkDesc.class, VectorFileSinkOperator.class));
     vectorOpvec.add(new OpTuple<FilterDesc>(FilterDesc.class, VectorFilterOperator.class));
     vectorOpvec.add(new OpTuple<LimitDesc>(LimitDesc.class, VectorLimitOperator.class));
+    vectorOpvec.add(new OpTuple<ExtractDesc>(ExtractDesc.class, VectorExtractOperator.class));
   }
 
   private static final class OpTuple<T extends OperatorDesc> {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java Tue Aug  5 23:05:47 2014
@@ -136,7 +136,7 @@ public class ReduceRecordProcessor  exte
     reducer.setParentOperators(null); // clear out any parents as reducer is the
     // root
     isTagged = redWork.getNeedsTagging();
-    vectorized = redWork.getVectorModeOn() != null;
+    vectorized = redWork.getVectorMode();
 
     try {
       keyTableDesc = redWork.getKeyDesc();

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java?rev=1616043&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java Tue Aug  5 23:05:47 2014
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExtractDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+/**
+ * Vectorized extract operator implementation.  Consumes rows and outputs a
+ * vectorized batch of subobjects.
+ **/
+public class VectorExtractOperator extends ExtractOperator {
+  private static final long serialVersionUID = 1L;
+
+  private int keyColCount;
+  private int valueColCount;
+  
+  private transient int [] projectedColumns = null;
+
+  public VectorExtractOperator(VectorizationContext vContext, OperatorDesc conf)
+      throws HiveException {
+    this();
+    this.conf = (ExtractDesc) conf;
+  }
+
+  public VectorExtractOperator() {
+    super();
+  }
+
+  private StructObjectInspector makeStandardStructObjectInspector(StructObjectInspector structObjectInspector) {
+    List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
+    ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
+    ArrayList<String> colNames = new ArrayList<String>();
+    for (StructField field: fields) {
+      colNames.add(field.getFieldName());
+      ois.add(field.getFieldObjectInspector());
+    }
+    return ObjectInspectorFactory
+              .getStandardStructObjectInspector(colNames, ois);
+    }
+ 
+  @Override
+  protected void initializeOp(Configuration hconf) throws HiveException {
+    outputObjInspector = inputObjInspectors[0];
+    LOG.info("VectorExtractOperator class of outputObjInspector is " + outputObjInspector.getClass().getName());
+    projectedColumns = new int [valueColCount];
+    for (int i = 0; i < valueColCount; i++) {
+      projectedColumns[i] = keyColCount + i;
+    }
+    initializeChildren(hconf);
+  }
+
+  public void setKeyAndValueColCounts(int keyColCount, int valueColCount) {
+      this.keyColCount = keyColCount;
+      this.valueColCount = valueColCount;
+  }
+  
+  @Override
+  // Evaluate vectorized batches of rows and forward them.
+  public void processOp(Object row, int tag) throws HiveException {
+    VectorizedRowBatch vrg = (VectorizedRowBatch) row;
+
+    // Project away the key columns...
+    int[] originalProjections = vrg.projectedColumns;
+    int originalProjectionSize = vrg.projectionSize;
+    vrg.projectionSize = valueColCount;
+    vrg.projectedColumns = this.projectedColumns;
+
+    forward(vrg, outputObjInspector);
+
+    // Revert the projected columns back, because vrg will be re-used.
+    vrg.projectionSize = originalProjectionSize;
+    vrg.projectedColumns = originalProjections;
+  }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Tue Aug  5 23:05:47 2014
@@ -36,6 +36,7 @@ import org.apache.hadoop.hive.conf.HiveC
 import org.apache.hadoop.hive.ql.exec.*;
 import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
 import org.apache.hadoop.hive.ql.exec.tez.TezTask;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExtractOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
@@ -62,9 +63,12 @@ import org.apache.hadoop.hive.ql.plan.Ex
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.plan.MapredWork;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceWork;
 import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.ql.plan.TezWork;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
@@ -107,6 +111,12 @@ import org.apache.hadoop.hive.ql.udf.UDF
 import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
 import org.apache.hadoop.hive.ql.udf.UDFYear;
 import org.apache.hadoop.hive.ql.udf.generic.*;
+import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.util.ReflectionUtils;
 
 public class Vectorizer implements PhysicalPlanResolver {
 
@@ -256,7 +266,15 @@ public class Vectorizer implements Physi
 
   class VectorizationDispatcher implements Dispatcher {
 
+    private PhysicalContext pctx;
+
+    private int keyColCount;
+    private int valueColCount;
+
     public VectorizationDispatcher(PhysicalContext pctx) {
+      this.pctx = pctx;
+      keyColCount = 0;
+      valueColCount = 0;
     }
 
     @Override
@@ -270,6 +288,9 @@ public class Vectorizer implements Physi
         for (BaseWork w: work.getAllWork()) {
           if (w instanceof MapWork) {
             convertMapWork((MapWork)w);
+          } else if (w instanceof ReduceWork) {
+            // We are only vectorizing Reduce under Tez.
+            convertReduceWork((ReduceWork)w);
           }
         }
       }
@@ -283,6 +304,13 @@ public class Vectorizer implements Physi
       }
     }
 
+    private void addMapWorkRules(Map<Rule, NodeProcessor> opRules, NodeProcessor np) {
+      opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + ".*"
+          + FileSinkOperator.getOperatorName()), np);
+      opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + ".*"
+          + ReduceSinkOperator.getOperatorName()), np);
+    }
+
     private boolean validateMapWork(MapWork mapWork) throws SemanticException {
 
       // Validate the input format
@@ -297,11 +325,8 @@ public class Vectorizer implements Physi
         }
       }
       Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
-      ValidationNodeProcessor vnp = new ValidationNodeProcessor();
-      opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + ".*"
-          + FileSinkOperator.getOperatorName()), vnp);
-      opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + ".*"
-          + ReduceSinkOperator.getOperatorName()), vnp);
+      MapWorkValidationNodeProcessor vnp = new MapWorkValidationNodeProcessor();
+      addMapWorkRules(opRules, vnp);
       Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
       GraphWalker ogw = new DefaultGraphWalker(disp);
       // iterator the mapper operator tree
@@ -320,14 +345,11 @@ public class Vectorizer implements Physi
     }
 
     private void vectorizeMapWork(MapWork mapWork) throws SemanticException {
-      LOG.info("Vectorizing task...");
+      LOG.info("Vectorizing MapWork...");
       mapWork.setVectorMode(true);
       Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
-      VectorizationNodeProcessor vnp = new VectorizationNodeProcessor(mapWork);
-      opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + ".*" +
-          ReduceSinkOperator.getOperatorName()), vnp);
-      opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + ".*"
-          + FileSinkOperator.getOperatorName()), vnp);
+      MapWorkVectorizationNodeProcessor vnp = new MapWorkVectorizationNodeProcessor(mapWork);
+      addMapWorkRules(opRules, vnp);
       Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
       GraphWalker ogw = new PreOrderWalker(disp);
       // iterator the mapper operator tree
@@ -348,9 +370,114 @@ public class Vectorizer implements Physi
 
       return;
     }
+
+    private void convertReduceWork(ReduceWork reduceWork) throws SemanticException {
+      boolean ret = validateReduceWork(reduceWork);
+      if (ret) {
+        vectorizeReduceWork(reduceWork);
+      }
+    }
+
+    private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork) throws SemanticException {
+      try {
+        // Check key ObjectInspector.
+        ObjectInspector keyObjectInspector = reduceWork.getKeyObjectInspector();
+        if (keyObjectInspector == null || !(keyObjectInspector instanceof StructObjectInspector)) {
+          return false;
+        }
+        StructObjectInspector keyStructObjectInspector = (StructObjectInspector)keyObjectInspector;
+        keyColCount = keyStructObjectInspector.getAllStructFieldRefs().size();
+
+        // Tez doesn't use tagging...
+        if (reduceWork.getNeedsTagging()) {
+          return false;
+        }
+
+        // Check value ObjectInspector.
+        ObjectInspector valueObjectInspector = reduceWork.getValueObjectInspector();
+        if (valueObjectInspector == null || !(valueObjectInspector instanceof StructObjectInspector)) {
+          return false;
+        }
+        StructObjectInspector valueStructObjectInspector = (StructObjectInspector)valueObjectInspector;
+        valueColCount = valueStructObjectInspector.getAllStructFieldRefs().size();
+      } catch (Exception e) {
+        throw new SemanticException(e);
+      }
+      return true;
+    }
+
+    private void addReduceWorkRules(Map<Rule, NodeProcessor> opRules, NodeProcessor np) {
+      opRules.put(new RuleRegExp("R1", ExtractOperator.getOperatorName() + ".*"), np);
+      opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() + ".*"), np);
+    }
+
+    private boolean validateReduceWork(ReduceWork reduceWork) throws SemanticException {
+      // Validate input to ReduceWork.
+      if (!getOnlyStructObjectInspectors(reduceWork)) {
+        return false;
+      }
+      // Now check the reduce operator tree.
+      Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+      ReduceWorkValidationNodeProcessor vnp = new ReduceWorkValidationNodeProcessor();
+      addReduceWorkRules(opRules, vnp);
+      Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
+      GraphWalker ogw = new DefaultGraphWalker(disp);
+      // iterator the reduce operator tree
+      ArrayList<Node> topNodes = new ArrayList<Node>();
+      topNodes.add(reduceWork.getReducer());
+      HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
+      ogw.startWalking(topNodes, nodeOutput);
+      for (Node n : nodeOutput.keySet()) {
+        if (nodeOutput.get(n) != null) {
+          if (!((Boolean)nodeOutput.get(n)).booleanValue()) {
+            return false;
+          }
+        }
+      }
+      return true;
+    }
+
+    private void vectorizeReduceWork(ReduceWork reduceWork) throws SemanticException {
+      LOG.info("Vectorizing ReduceWork...");
+      reduceWork.setVectorMode(true);
+ 
+      // For some reason, the DefaultGraphWalker does not descend down from the reducer Operator as expected.
+      // We need to descend down, otherwise it breaks our algorithm that determines VectorizationContext...
+      // Do we use PreOrderWalker instead of DefaultGraphWalker.
+      Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+      ReduceWorkVectorizationNodeProcessor vnp = new ReduceWorkVectorizationNodeProcessor(reduceWork, keyColCount, valueColCount);
+      addReduceWorkRules(opRules, vnp);
+      Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
+      GraphWalker ogw = new PreOrderWalker(disp);
+      // iterator the reduce operator tree
+      ArrayList<Node> topNodes = new ArrayList<Node>();
+      topNodes.add(reduceWork.getReducer());
+      LOG.info("vectorizeReduceWork reducer Operator: " + reduceWork.getReducer().getName() + "...");
+      HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
+      ogw.startWalking(topNodes, nodeOutput);
+
+      // Necessary since we are vectorizing the root operator in reduce.
+      reduceWork.setReducer(vnp.getRootVectorOp());
+
+      Operator<? extends OperatorDesc> reducer = reduceWork.getReducer();
+      if (reducer.getType().equals(OperatorType.EXTRACT)) {
+        ((VectorExtractOperator)reducer).setKeyAndValueColCounts(keyColCount, valueColCount);
+      }
+
+      Map<String, Map<Integer, String>> columnVectorTypes = vnp.getScratchColumnVectorTypes();
+      reduceWork.setScratchColumnVectorTypes(columnVectorTypes);
+      Map<String, Map<String, Integer>> columnMap = vnp.getScratchColumnMap();
+      reduceWork.setScratchColumnMap(columnMap);
+
+
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(String.format("vectorTypes: %s", columnVectorTypes.toString()));
+        LOG.debug(String.format("columnMap: %s", columnMap.toString()));
+      }
+    }
   }
 
-  class ValidationNodeProcessor implements NodeProcessor {
+  class MapWorkValidationNodeProcessor implements NodeProcessor {
 
     @Override
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
@@ -361,9 +488,9 @@ public class Vectorizer implements Physi
             op.getParentOperators().get(0).getType().equals(OperatorType.GROUPBY)) {
           return new Boolean(true);
         }
-        boolean ret = validateOperator(op);
+        boolean ret = validateMapWorkOperator(op);
         if (!ret) {
-          LOG.info("Operator: " + op.getName() + " could not be vectorized.");
+          LOG.info("MapWork Operator: " + op.getName() + " could not be vectorized.");
           return new Boolean(false);
         }
       }
@@ -371,24 +498,37 @@ public class Vectorizer implements Physi
     }
   }
 
-  class VectorizationNodeProcessor implements NodeProcessor {
+  class ReduceWorkValidationNodeProcessor implements NodeProcessor {
 
-    private final MapWork mWork;
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+      for (Node n : stack) {
+        Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) n;
+        boolean ret = validateReduceWorkOperator(op);
+        if (!ret) {
+          LOG.info("ReduceWork Operator: " + op.getName() + " could not be vectorized.");
+          return new Boolean(false);
+        }
+      }
+      return new Boolean(true);
+    }
+  }
+
+  // This class has common code used by both MapWorkVectorizationNodeProcessor and
+  // ReduceWorkVectorizationNodeProcessor.
+  class VectorizationNodeProcessor implements NodeProcessor {
 
     // This is used to extract scratch column types for each file key
-    private final Map<String, VectorizationContext> scratchColumnContext =
+    protected final Map<String, VectorizationContext> scratchColumnContext =
         new HashMap<String, VectorizationContext>();
 
-    private final Map<Operator<? extends OperatorDesc>, VectorizationContext> vContextsByTSOp =
+    protected final Map<Operator<? extends OperatorDesc>, VectorizationContext> vContextsByTSOp =
         new HashMap<Operator<? extends OperatorDesc>, VectorizationContext>();
 
-    private final Set<Operator<? extends OperatorDesc>> opsDone =
+    protected final Set<Operator<? extends OperatorDesc>> opsDone =
         new HashSet<Operator<? extends OperatorDesc>>();
 
-    public VectorizationNodeProcessor(MapWork mWork) {
-      this.mWork = mWork;
-    }
-
     public Map<String, Map<Integer, String>> getScratchColumnVectorTypes() {
       Map<String, Map<Integer, String>> scratchColumnVectorTypes =
           new HashMap<String, Map<Integer, String>>();
@@ -411,16 +551,90 @@ public class Vectorizer implements Physi
       return scratchColumnMap;
     }
 
+    public VectorizationContext walkStackToFindVectorizationContext(Stack<Node> stack, Operator<? extends OperatorDesc> op)
+            throws SemanticException {
+      VectorizationContext vContext = null;
+      if (stack.size() <= 1) {
+        throw new SemanticException(String.format("Expected operator stack for operator %s to have at least 2 operators", op.getName()));
+      }
+      // Walk down the stack of operators until we found one willing to give us a context.
+      // At the bottom will be the root operator, guaranteed to have a context
+      int i= stack.size()-2;
+      while (vContext == null) {
+        if (i < 0) {
+          throw new SemanticException(String.format("Did not find vectorization context for operator %s in operator stack", op.getName()));
+        }
+        Operator<? extends OperatorDesc> opParent = (Operator<? extends OperatorDesc>) stack.get(i);
+        vContext = vContextsByTSOp.get(opParent);
+        --i;
+      }
+      return vContext;
+    }
+
+    public Boolean nonVectorizableChildOfGroupBy(Operator<? extends OperatorDesc> op) {
+      Operator<? extends OperatorDesc> currentOp = op;
+      while (currentOp.getParentOperators().size() > 0) {
+        currentOp = currentOp.getParentOperators().get(0);
+        if (currentOp.getType().equals(OperatorType.GROUPBY)) {
+          // No need to vectorize
+          if (!opsDone.contains(op)) {
+            opsDone.add(op);
+          }
+          return true;
+        }
+      }
+      return false;
+    }
+
+    public Operator<? extends OperatorDesc> doVectorize(Operator<? extends OperatorDesc> op, VectorizationContext vContext)
+            throws SemanticException {
+      Operator<? extends OperatorDesc> vectorOp = op;
+      try {
+        if (!opsDone.contains(op)) {
+          vectorOp = vectorizeOperator(op, vContext);
+          opsDone.add(op);
+          if (vectorOp != op) {
+            opsDone.add(vectorOp);
+          }
+          if (vectorOp instanceof VectorizationContextRegion) {
+            VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp;
+            VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext();
+            vContextsByTSOp.put(op, vOutContext);
+            scratchColumnContext.put(vOutContext.getFileKey(), vOutContext);
+          }
+        }
+      } catch (HiveException e) {
+        throw new SemanticException(e);
+      }
+      return vectorOp;
+    }
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+      throw new SemanticException("Must be overridden");
+    }
+  }
+  
+  class MapWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {
+
+    private final MapWork mWork;
+
+    public MapWorkVectorizationNodeProcessor(MapWork mWork) {
+      this.mWork = mWork;
+    }
+
     @Override
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
 
       Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
+      LOG.info("MapWorkVectorizationNodeProcessor processing Operator: " + op.getName() + "...");
 
       VectorizationContext vContext = null;
 
       if (op instanceof TableScanOperator) {
-        vContext = getVectorizationContext((TableScanOperator) op, physicalContext);
+        vContext = getVectorizationContext(op, physicalContext);
         for (String onefile : mWork.getPathToAliases().keySet()) {
           List<String> aliases = mWork.getPathToAliases().get(onefile);
           for (String alias : aliases) {
@@ -438,45 +652,76 @@ public class Vectorizer implements Physi
         }
         vContextsByTSOp.put(op, vContext);
       } else {
-        assert stack.size() > 1;
-        // Walk down the stack of operators until we found one willing to give us a context.
-        // At the bottom will be the TS operator, guaranteed to have a context
-        int i= stack.size()-2;
-        while (vContext == null) {
-          Operator<? extends OperatorDesc> opParent = (Operator<? extends OperatorDesc>) stack.get(i);
-          vContext = vContextsByTSOp.get(opParent);
-          --i;
-        }
+        vContext = walkStackToFindVectorizationContext(stack, op);
       }
 
       assert vContext != null;
 
-      if ((op.getType().equals(OperatorType.REDUCESINK) || op.getType().equals(OperatorType.FILESINK)) &&
-          op.getParentOperators().get(0).getType().equals(OperatorType.GROUPBY)) {
-        // No need to vectorize
-        if (!opsDone.contains(op)) {
-          opsDone.add(op);
-        }
+      // Currently, Vectorized GROUPBY outputs rows, not vectorized row batchs.  So, don't vectorize
+      // any operators below GROUPBY.
+      if (nonVectorizableChildOfGroupBy(op)) {
+        return null;
+      }
+
+      doVectorize(op, vContext);
+
+      return null;
+    }
+  }
+
+  class ReduceWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {
+
+    private final ReduceWork rWork;
+    private int keyColCount;
+    private int valueColCount;
+    private Map<String, Integer> reduceColumnNameMap;
+
+    private Operator<? extends OperatorDesc> rootVectorOp;
+
+    public Operator<? extends OperatorDesc> getRootVectorOp() {
+      return rootVectorOp;
+    }
+
+    public ReduceWorkVectorizationNodeProcessor(ReduceWork rWork, int keyColCount, int valueColCount) {
+      this.rWork = rWork;
+      reduceColumnNameMap = rWork.getReduceColumnNameMap();
+      this.keyColCount = keyColCount;
+      this.valueColCount = valueColCount;
+      rootVectorOp = null;
+    }
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+
+      Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
+      LOG.info("ReduceWorkVectorizationNodeProcessor processing Operator: " + op.getName() + "...");
+
+      VectorizationContext vContext = null;
+
+      boolean saveRootVectorOp = false;
+
+      if (op.getParentOperators().size() == 0) {
+        vContext = getReduceVectorizationContext(reduceColumnNameMap);
+        vContextsByTSOp.put(op, vContext);
+        saveRootVectorOp = true;
       } else {
-        try {
-          if (!opsDone.contains(op)) {
-            Operator<? extends OperatorDesc> vectorOp =
-                vectorizeOperator(op, vContext);
-            opsDone.add(op);
-            if (vectorOp != op) {
-              opsDone.add(vectorOp);
-            }
-            if (vectorOp instanceof VectorizationContextRegion) {
-              VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp;
-              VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext();
-              vContextsByTSOp.put(op, vOutContext);
-              scratchColumnContext.put(vOutContext.getFileKey(), vOutContext);
-            }
-          }
-        } catch (HiveException e) {
-          throw new SemanticException(e);
-        }
+        vContext = walkStackToFindVectorizationContext(stack, op);
+      }
+
+      assert vContext != null;
+
+      // Currently, Vectorized GROUPBY outputs rows, not vectorized row batchs.  So, don't vectorize
+      // any operators below GROUPBY.
+      if (nonVectorizableChildOfGroupBy(op)) {
+        return null;
       }
+
+      Operator<? extends OperatorDesc> vectorOp = doVectorize(op, vContext);
+      if (saveRootVectorOp && op != vectorOp) {
+        rootVectorOp = vectorOp;
+      }
+
       return null;
     }
   }
@@ -519,7 +764,7 @@ public class Vectorizer implements Physi
     return pctx;
   }
 
-  boolean validateOperator(Operator<? extends OperatorDesc> op) {
+  boolean validateMapWorkOperator(Operator<? extends OperatorDesc> op) {
     boolean ret = false;
     switch (op.getType()) {
       case MAPJOIN:
@@ -555,6 +800,32 @@ public class Vectorizer implements Physi
     return ret;
   }
 
+  boolean validateReduceWorkOperator(Operator<? extends OperatorDesc> op) {
+    boolean ret = false;
+    switch (op.getType()) {
+      case EXTRACT:
+        ret = validateExtractOperator((ExtractOperator) op);
+        break;
+      case FILTER:
+        ret = validateFilterOperator((FilterOperator) op);
+        break;
+      case SELECT:
+        ret = validateSelectOperator((SelectOperator) op);
+        break;
+      case REDUCESINK:
+          ret = validateReduceSinkOperator((ReduceSinkOperator) op);
+          break;
+      case FILESINK:
+      case LIMIT:
+        ret = true;
+        break;
+      default:
+        ret = false;
+        break;
+    }
+    return ret;
+  }
+
   private boolean validateSMBMapJoinOperator(SMBMapJoinOperator op) {
     SMBJoinDesc desc = op.getConf();
     // Validation is the same as for map join, since the 'small' tables are not vectorized
@@ -617,6 +888,15 @@ public class Vectorizer implements Physi
     return validateAggregationDesc(op.getConf().getAggregators());
   }
 
+  private boolean validateExtractOperator(ExtractOperator op) {
+    ExprNodeDesc expr = op.getConf().getCol();
+    boolean ret = validateExprNodeDesc(expr);
+    if (!ret) {
+      return false;
+    }
+    return true;
+  }
+
   private boolean validateExprNodeDesc(List<ExprNodeDesc> descs) {
     return validateExprNodeDesc(descs, VectorExpressionDescriptor.Mode.PROJECTION);
   }
@@ -728,7 +1008,7 @@ public class Vectorizer implements Physi
     return supportedDataTypesPattern.matcher(type.toLowerCase()).matches();
   }
 
-  private VectorizationContext getVectorizationContext(TableScanOperator op,
+  private VectorizationContext getVectorizationContext(Operator op,
       PhysicalContext pctx) {
     RowSchema rs = op.getSchema();
 
@@ -741,8 +1021,26 @@ public class Vectorizer implements Physi
       }
     }
 
-    VectorizationContext vc =  new VectorizationContext(cmap, columnCount);
-    return vc;
+    return new VectorizationContext(cmap, columnCount);
+  }
+
+  private VectorizationContext getReduceVectorizationContext(Map<String, Integer> reduceColumnNameMap) {
+    return new VectorizationContext(reduceColumnNameMap, reduceColumnNameMap.size());
+  }
+
+  private void fixupParentChildOperators(Operator<? extends OperatorDesc> op, Operator<? extends OperatorDesc> vectorOp) {
+    if (op.getParentOperators() != null) {
+      vectorOp.setParentOperators(op.getParentOperators());
+      for (Operator<? extends OperatorDesc> p : op.getParentOperators()) {
+        p.replaceChild(op, vectorOp);
+      }
+    }
+    if (op.getChildOperators() != null) {
+      vectorOp.setChildOperators(op.getChildOperators());
+      for (Operator<? extends OperatorDesc> c : op.getChildOperators()) {
+        c.replaceParent(op, vectorOp);
+      }
+    }
   }
 
   Operator<? extends OperatorDesc> vectorizeOperator(Operator<? extends OperatorDesc> op,
@@ -757,6 +1055,7 @@ public class Vectorizer implements Physi
       case FILESINK:
       case REDUCESINK:
       case LIMIT:
+      case EXTRACT:
         vectorOp = OperatorFactory.getVectorOperator(op.getConf(), vContext);
         break;
       default:
@@ -765,18 +1064,7 @@ public class Vectorizer implements Physi
     }
 
     if (vectorOp != op) {
-      if (op.getParentOperators() != null) {
-        vectorOp.setParentOperators(op.getParentOperators());
-        for (Operator<? extends OperatorDesc> p : op.getParentOperators()) {
-          p.replaceChild(op, vectorOp);
-        }
-      }
-      if (op.getChildOperators() != null) {
-        vectorOp.setChildOperators(op.getChildOperators());
-        for (Operator<? extends OperatorDesc> c : op.getChildOperators()) {
-          c.replaceParent(op, vectorOp);
-        }
-      }
+      fixupParentChildOperators(op, vectorOp);
       ((AbstractOperatorDesc) vectorOp.getConf()).setVectorMode(true);
     }
     return vectorOp;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java Tue Aug  5 23:05:47 2014
@@ -52,6 +52,11 @@ public abstract class BaseWork extends A
 
   private String name;
 
+  // Vectorization.
+  protected Map<String, Map<Integer, String>> scratchColumnVectorTypes = null;
+  protected Map<String, Map<String, Integer>> scratchColumnMap = null;
+  protected boolean vectorMode = false;
+
   public void setGatheringStats(boolean gatherStats) {
     this.gatheringStats = gatherStats;
   }
@@ -107,5 +112,31 @@ public abstract class BaseWork extends A
     return returnSet;
   }
 
+  public Map<String, Map<Integer, String>> getScratchColumnVectorTypes() {
+    return scratchColumnVectorTypes;
+  }
+
+  public void setScratchColumnVectorTypes(
+      Map<String, Map<Integer, String>> scratchColumnVectorTypes) {
+    this.scratchColumnVectorTypes = scratchColumnVectorTypes;
+  }
+
+  public Map<String, Map<String, Integer>> getScratchColumnMap() {
+    return scratchColumnMap;
+  }
+
+  public void setScratchColumnMap(Map<String, Map<String, Integer>> scratchColumnMap) {
+    this.scratchColumnMap = scratchColumnMap;
+  }
+
+  @Override
+  public void setVectorMode(boolean vectorMode) {
+    this.vectorMode = vectorMode;
+  }
+
+  public boolean getVectorMode() {
+    return vectorMode;
+  }
+
   public abstract void configureJobConf(JobConf job);
 }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java Tue Aug  5 23:05:47 2014
@@ -116,10 +116,6 @@ public class MapWork extends BaseWork {
 
   private boolean useOneNullRowInputFormat;
 
-  private Map<String, Map<Integer, String>> scratchColumnVectorTypes = null;
-  private Map<String, Map<String, Integer>> scratchColumnMap = null;
-  private boolean vectorMode = false;
-
   public MapWork() {}
 
   public MapWork(String name) {
@@ -519,32 +515,6 @@ public class MapWork extends BaseWork {
     }
   }
 
-  public Map<String, Map<Integer, String>> getScratchColumnVectorTypes() {
-    return scratchColumnVectorTypes;
-  }
-
-  public void setScratchColumnVectorTypes(
-      Map<String, Map<Integer, String>> scratchColumnVectorTypes) {
-    this.scratchColumnVectorTypes = scratchColumnVectorTypes;
-  }
-
-  public Map<String, Map<String, Integer>> getScratchColumnMap() {
-    return scratchColumnMap;
-  }
-
-  public void setScratchColumnMap(Map<String, Map<String, Integer>> scratchColumnMap) {
-    this.scratchColumnMap = scratchColumnMap;
-  }
-
-  public boolean getVectorMode() {
-    return vectorMode;
-  }
-
-  @Override
-  public void setVectorMode(boolean vectorMode) {
-    this.vectorMode = vectorMode;
-  }
-
   public void logPathToAliases() {
     if (LOG.isDebugEnabled()) {
       LOG.debug("LOGGING PATH TO ALIASES");

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java Tue Aug  5 23:05:47 2014
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.plan;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.LinkedHashSet;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -30,7 +31,18 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.OperatorUtils;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.util.ReflectionUtils;
 
 /**
  * ReduceWork represents all the information used to run a reduce task on the cluster.
@@ -84,6 +96,11 @@ public class ReduceWork extends BaseWork
   // for auto reduce parallelism - max reducers requested
   private int maxReduceTasks;
 
+  private ObjectInspector keyObjectInspector = null;
+  private ObjectInspector valueObjectInspector = null;
+
+  private Map<String, Integer> reduceColumnNameMap = new LinkedHashMap<String, Integer>();
+
   /**
    * If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing
    * to keySerializeInfo of the ReduceSink
@@ -95,7 +112,90 @@ public class ReduceWork extends BaseWork
   }
 
   public TableDesc getKeyDesc() {
-    return keyDesc;
+     return keyDesc;
+  }
+
+  private ObjectInspector getObjectInspector(TableDesc desc) {
+    ObjectInspector objectInspector;
+    try {
+      Deserializer deserializer = (SerDe) ReflectionUtils.newInstance(desc
+                .getDeserializerClass(), null);
+      SerDeUtils.initializeSerDe(deserializer, null, desc.getProperties(), null);
+      objectInspector = deserializer.getObjectInspector();
+    } catch (Exception e) {
+      return null;
+    }
+    return objectInspector;
+  }
+
+  public ObjectInspector getKeyObjectInspector() {
+    if (keyObjectInspector == null) {
+      keyObjectInspector = getObjectInspector(keyDesc);
+    }
+    return keyObjectInspector;
+  }
+
+  // Only works when not tagging.
+  public ObjectInspector getValueObjectInspector() {
+    if (needsTagging) {
+      return null;
+    }
+    if (valueObjectInspector == null) {
+      valueObjectInspector = getObjectInspector(tagToValueDesc.get(0));
+    }
+    return valueObjectInspector;
+  }
+
+  private int addToReduceColumnNameMap(StructObjectInspector structObjectInspector, int startIndex, String prefix) {
+    List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
+    int index = startIndex;
+    for (StructField field: fields) {
+      reduceColumnNameMap.put(prefix + "." + field.getFieldName(), index);
+      index++;
+    }
+    return index;
+  }
+
+  public Boolean fillInReduceColumnNameMap() {
+    ObjectInspector keyObjectInspector = getKeyObjectInspector();
+    if (keyObjectInspector == null || !(keyObjectInspector instanceof StructObjectInspector)) {
+        return false;
+    }
+    StructObjectInspector keyStructObjectInspector = (StructObjectInspector) keyObjectInspector;
+
+    ObjectInspector valueObjectInspector = getValueObjectInspector();
+    if (valueObjectInspector == null || !(valueObjectInspector instanceof StructObjectInspector)) {
+        return false;
+    }
+    StructObjectInspector valueStructObjectInspector = (StructObjectInspector) valueObjectInspector;
+
+    int keyCount = addToReduceColumnNameMap(keyStructObjectInspector, 0, Utilities.ReduceField.KEY.toString());
+    addToReduceColumnNameMap(valueStructObjectInspector, keyCount, Utilities.ReduceField.VALUE.toString());
+    return true;
+  }
+
+  public Map<String, Integer> getReduceColumnNameMap() {
+    if (needsTagging) {
+      return null;
+    }
+    if (reduceColumnNameMap.size() == 0) {
+      if (!fillInReduceColumnNameMap()) {
+        return null;
+      }
+    }
+    return reduceColumnNameMap;
+  }
+
+  public List<String> getReduceColumnNames() {
+    if (needsTagging) {
+        return null;
+    }
+    if (reduceColumnNameMap.size() == 0) {
+        if (!fillInReduceColumnNameMap()) {
+            return null;
+        }
+    }
+    return new ArrayList<String>(reduceColumnNameMap.keySet());
   }
 
   public List<TableDesc> getTagToValueDesc() {

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java Tue Aug  5 23:05:47 2014
@@ -107,7 +107,7 @@ public class TestVectorizer {
     gbyOp.setConf(desc);
 
     Vectorizer v = new Vectorizer();
-    Assert.assertTrue(v.validateOperator(gbyOp));
+    Assert.assertTrue(v.validateMapWorkOperator(gbyOp));
     VectorGroupByOperator vectorOp = (VectorGroupByOperator) v.vectorizeOperator(gbyOp, vContext);
     Assert.assertEquals(VectorUDAFSumLong.class, vectorOp.getAggregators()[0].getClass());
     VectorUDAFSumLong udaf = (VectorUDAFSumLong) vectorOp.getAggregators()[0];
@@ -187,7 +187,7 @@ public class TestVectorizer {
     mop.setConf(mjdesc);
  
     Vectorizer vectorizer = new Vectorizer();
-    Assert.assertTrue(vectorizer.validateOperator(mop));
+    Assert.assertTrue(vectorizer.validateMapWorkOperator(mop));
   }
 
   
@@ -203,6 +203,6 @@ public class TestVectorizer {
       mop.setConf(mjdesc);
     
       Vectorizer vectorizer = new Vectorizer();
-      Assert.assertTrue(vectorizer.validateOperator(mop)); 
+      Assert.assertTrue(vectorizer.validateMapWorkOperator(mop)); 
   }
 }

Modified: hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q Tue Aug  5 23:05:47 2014
@@ -1,4 +1,38 @@
 SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN 
+SELECT   ctimestamp1,
+         cfloat,
+         cstring1,
+         cboolean1,
+         cdouble,
+         (-26.28 + cdouble),
+         (-((-26.28 + cdouble))),
+         STDDEV_SAMP((-((-26.28 + cdouble)))),
+         (cfloat * -26.28),
+         MAX(cfloat),
+         (-(cfloat)),
+         (-(MAX(cfloat))),
+         ((-((-26.28 + cdouble))) / 10.175),
+         STDDEV_POP(cfloat),
+         COUNT(cfloat),
+         (-(((-((-26.28 + cdouble))) / 10.175))),
+         (-1.389 % STDDEV_SAMP((-((-26.28 + cdouble))))),
+         (cfloat - cdouble),
+         VAR_POP(cfloat),
+         (VAR_POP(cfloat) % 10.175),
+         VAR_SAMP(cfloat),
+         (-((cfloat - cdouble)))
+FROM     alltypesorc
+WHERE    (((ctinyint <= cbigint)
+           AND ((cint <= cdouble)
+                OR (ctimestamp2 < ctimestamp1)))
+          AND ((cdouble < ctinyint)
+              AND ((cbigint > -257)
+                  OR (cfloat < cint))))
+GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble
+ORDER BY cstring1, cfloat, cdouble, ctimestamp1;
+
 SELECT   ctimestamp1,
          cfloat,
          cstring1,

Modified: hive/trunk/ql/src/test/queries/clientpositive/vectorization_15.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorization_15.q?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorization_15.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorization_15.q Tue Aug  5 23:05:47 2014
@@ -1,4 +1,36 @@
 SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN 
+SELECT   cfloat,
+         cboolean1,
+         cdouble,
+         cstring1,
+         ctinyint,
+         cint,
+         ctimestamp1,
+         STDDEV_SAMP(cfloat),
+         (-26.28 - cint),
+         MIN(cdouble),
+         (cdouble * 79.553),
+         (33 % cfloat),
+         STDDEV_SAMP(ctinyint),
+         VAR_POP(ctinyint),
+         (-23 % cdouble),
+         (-(ctinyint)),
+         VAR_SAMP(cint),
+         (cint - cfloat),
+         (-23 % ctinyint),
+         (-((-26.28 - cint))),
+         STDDEV_POP(cint)
+FROM     alltypesorc
+WHERE    (((cstring2 LIKE '%ss%')
+           OR (cstring1 LIKE '10%'))
+          OR ((cint >= -75)
+              AND ((ctinyint = csmallint)
+                   AND (cdouble >= -3728))))
+GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1
+ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1;
+
 SELECT   cfloat,
          cboolean1,
          cdouble,

Modified: hive/trunk/ql/src/test/queries/clientpositive/vectorization_16.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorization_16.q?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorization_16.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorization_16.q Tue Aug  5 23:05:47 2014
@@ -1,4 +1,25 @@
 SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN 
+SELECT   cstring1,
+         cdouble,
+         ctimestamp1,
+         (cdouble - 9763215.5639),
+         (-((cdouble - 9763215.5639))),
+         COUNT(cdouble),
+         STDDEV_SAMP(cdouble),
+         (-(STDDEV_SAMP(cdouble))),
+         (STDDEV_SAMP(cdouble) * COUNT(cdouble)),
+         MIN(cdouble),
+         (9763215.5639 / cdouble),
+         (COUNT(cdouble) / -1.389),
+         STDDEV_SAMP(cdouble)
+FROM     alltypesorc
+WHERE    ((cstring2 LIKE '%b%')
+          AND ((cdouble >= -1.389)
+              OR (cstring1 < 'a')))
+GROUP BY cstring1, cdouble, ctimestamp1;
+
 SELECT   cstring1,
          cdouble,
          ctimestamp1,

Modified: hive/trunk/ql/src/test/queries/clientpositive/vectorization_9.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorization_9.q?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorization_9.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorization_9.q Tue Aug  5 23:05:47 2014
@@ -1,4 +1,25 @@
 SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN 
+SELECT   cstring1,
+         cdouble,
+         ctimestamp1,
+         (cdouble - 9763215.5639),
+         (-((cdouble - 9763215.5639))),
+         COUNT(cdouble),
+         STDDEV_SAMP(cdouble),
+         (-(STDDEV_SAMP(cdouble))),
+         (STDDEV_SAMP(cdouble) * COUNT(cdouble)),
+         MIN(cdouble),
+         (9763215.5639 / cdouble),
+         (COUNT(cdouble) / -1.389),
+         STDDEV_SAMP(cdouble)
+FROM     alltypesorc
+WHERE    ((cstring2 LIKE '%b%')
+          AND ((cdouble >= -1.389)
+              OR (cstring1 < 'a')))
+GROUP BY cstring1, cdouble, ctimestamp1;
+
 SELECT   cfloat,
          cstring1,
          cint,

Added: hive/trunk/ql/src/test/results/clientpositive/tez/vector_decimal_aggregate.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/vector_decimal_aggregate.q.out?rev=1616043&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/vector_decimal_aggregate.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/vector_decimal_aggregate.q.out Tue Aug  5 23:05:47 2014
@@ -0,0 +1,117 @@
+PREHOOK: query: CREATE TABLE decimal_vgby STORED AS ORC AS 
+	SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, 
+	CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
+	cint
+	FROM alltypesorc
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+POSTHOOK: query: CREATE TABLE decimal_vgby STORED AS ORC AS 
+	SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, 
+	CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
+	cint
+	FROM alltypesorc
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@decimal_vgby
+PREHOOK: query: EXPLAIN SELECT cint,
+	COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1),
+	COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2)
+	FROM decimal_vgby
+	GROUP BY cint
+	HAVING COUNT(*) > 1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT cint,
+	COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1),
+	COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2)
+	FROM decimal_vgby
+	GROUP BY cint
+	HAVING COUNT(*) > 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: decimal_vgby
+                  Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14))
+                    outputColumnNames: cint, cdecimal1, cdecimal2
+                    Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count()
+                      keys: cint (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+                      Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct<count:bigint,sum:decimal(30,10),input:decimal(20,10)>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct<count:bigint,sum:decimal(33,14),input:decimal(23,14)>), _col13 (type: struct<count:bigint,sum:double,variance:double>), _col14 (type: struct<count:bigint,sum:double,variance:double>), _col15 (type: bigint)
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14)
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+                Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: (_col15 > 1) (type: boolean)
+                  Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: decimal(24,14)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: decimal(27,18)), _col13 (type: double), _col14 (type: double)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+                    Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT cint,
+	COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1),
+	COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2)
+	FROM decimal_vgby
+	GROUP BY cint
+	HAVING COUNT(*) > 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@decimal_vgby
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cint,
+	COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1),
+	COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2)
+	FROM decimal_vgby
+	GROUP BY cint
+	HAVING COUNT(*) > 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@decimal_vgby
+#### A masked pattern was here ####
+NULL	3072	9318.4351351351	-4298.1513513514	5018444.1081079808	1633.60810810806667	5695.483082135364	5696.4103077145055	3072	11160.715384615385	-5147.907692307693	6010604.3076923073536	1956.576923076922966667	6821.495748565159	6822.606289190924
+-3728	6	5831542.269248378	-3367.6517567568	5817556.0411483778	969592.67352472963333	2174330.2092403853	2381859.406131774	6	6984454.211097692	-4033.445769230769	6967702.8672438458471	1161283.811207307641183333	2604201.2704476737	2852759.5602156054
+-563	2	-515.621072973	-3367.6517567568	-3883.2728297298	-1941.6364148649	1426.0153418918999	2016.6902366556308	2	-617.5607769230769	-4033.445769230769	-4651.0065461538459	-2325.50327307692295	1707.9424961538462	2415.395441814127
+762	2	5831542.269248378	1531.2194054054	5833073.4886537834	2916536.7443268917	2915005.5249214866	4122440.3477364695	2	6984454.211097692	1833.9456923076925	6986288.1567899996925	3493144.07839499984625	3491310.1327026924	4937458.140118758
+6981	3	5831542.269248378	-515.621072973	5830511.027102432	1943503.67570081066667	2749258.455012492	3367140.1929065133	3	6984454.211097692	-617.5607769230769	6983219.0895438458462	2327739.696514615282066667	3292794.4113115156	4032833.0678006653
+253665376	1024	9767.0054054054	-9779.5486486487	-347484.0818378374	-339.33992366976309	5708.9563478862	5711.745967572779	1024	11697.969230769231	-11712.99230769231	-416182.64030769233089	-406.428359675480791885	6837.632716002934	6840.973851172274
+528534767	1024	5831542.269248378	-9777.1594594595	11646372.8607481068	11373.41099682432305	257528.92988206653	257654.7686043977	1024	6984454.211097692	-11710.130769230771	13948892.79980307629003	13621.965624807691689482	308443.1074570801	308593.82484083984
+626923679	1024	9723.4027027027	-9778.9513513514	10541.0525297287	10.29399661106318	5742.09145323734	5744.897264034267	1024	11645.746153846154	-11712.276923076923	12625.04759999997746	12.329148046874977988	6877.318722794877	6880.679250101603

Added: hive/trunk/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out?rev=1616043&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out Tue Aug  5 23:05:47 2014
@@ -0,0 +1,136 @@
+PREHOOK: query: explain 
+select count(*) from (select c.ctinyint 
+from alltypesorc c
+left outer join alltypesorc cd
+  on cd.cint = c.cint 
+left outer join alltypesorc hd
+  on hd.ctinyint = c.ctinyint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+select count(*) from (select c.ctinyint 
+from alltypesorc c
+left outer join alltypesorc cd
+  on cd.cint = c.cint 
+left outer join alltypesorc hd
+  on hd.ctinyint = c.ctinyint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+      Edges:
+        Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
+        Reducer 3 <- Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: hd
+                  Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: ctinyint (type: tinyint)
+                    sort order: +
+                    Map-reduce partition columns: ctinyint (type: tinyint)
+                    Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: c
+                  Statistics: Num rows: 47154 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Left Outer Join0 to 1
+                    condition expressions:
+                      0 {ctinyint}
+                      1 
+                    keys:
+                      0 cint (type: int)
+                      1 cint (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 103739 Data size: 414960 Basic stats: COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Left Outer Join0 to 1
+                      condition expressions:
+                        0 
+                        1 
+                      keys:
+                        0 _col0 (type: tinyint)
+                        1 ctinyint (type: tinyint)
+                      Statistics: Num rows: 114112 Data size: 456456 Basic stats: COMPLETE Column stats: NONE
+                      Select Operator
+                        Statistics: Num rows: 114112 Data size: 456456 Basic stats: COMPLETE Column stats: NONE
+                        Group By Operator
+                          aggregations: count()
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                            value expressions: _col0 (type: bigint)
+            Execution mode: vectorized
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: cd
+                  Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: cint (type: int)
+                    sort order: +
+                    Map-reduce partition columns: cint (type: int)
+                    Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: bigint)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from (select c.ctinyint
+from alltypesorc c
+left outer join alltypesorc cd
+  on cd.cint = c.cint 
+left outer join alltypesorc hd
+  on hd.ctinyint = c.ctinyint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select c.ctinyint
+from alltypesorc c
+left outer join alltypesorc cd
+  on cd.cint = c.cint 
+left outer join alltypesorc hd
+  on hd.ctinyint = c.ctinyint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+225951785