You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/08/06 01:05:48 UTC
svn commit: r1616043 [1/7] - in /hive/trunk: itests/qtest/
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/exec/tez/
ql/src/java/org/apache/hadoop/hive/ql/exec/vector/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/ph...
Author: hashutosh
Date: Tue Aug 5 23:05:47 2014
New Revision: 1616043
URL: http://svn.apache.org/r1616043
Log:
HIVE-7029 : Vectorize ReduceWork (Matt McCline via Jitendra Nath Pandey)
Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java
hive/trunk/ql/src/test/results/clientpositive/tez/vector_decimal_aggregate.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_12.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_13.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_14.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_9.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_part_project.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_short_regress.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_mapjoin.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_shufflejoin.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out
Modified:
hive/trunk/itests/qtest/testconfiguration.properties
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q
hive/trunk/ql/src/test/queries/clientpositive/vectorization_15.q
hive/trunk/ql/src/test/queries/clientpositive/vectorization_16.q
hive/trunk/ql/src/test/queries/clientpositive/vectorization_9.q
hive/trunk/ql/src/test/results/clientpositive/tez/vectorization_15.q.out
hive/trunk/ql/src/test/results/clientpositive/vectorization_14.q.out
hive/trunk/ql/src/test/results/clientpositive/vectorization_15.q.out
hive/trunk/ql/src/test/results/clientpositive/vectorization_16.q.out
hive/trunk/ql/src/test/results/clientpositive/vectorization_9.q.out
Modified: hive/trunk/itests/qtest/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/trunk/itests/qtest/testconfiguration.properties?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/itests/qtest/testconfiguration.properties (original)
+++ hive/trunk/itests/qtest/testconfiguration.properties Tue Aug 5 23:05:47 2014
@@ -1,5 +1,5 @@
minimr.query.files=stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q,udf_using.q,empty_dir_in_table.q,temp_table_external.q
minimr.query.negative.files=cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q,udf_local_resource.q
minitez.query.files=tez_fsstat.q,mapjoin_decimal.q,tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q,tez_union.q,bucket_map_join_tez1.q,bucket_map_join_tez2.q,tez_schema_evolution.q,tez_join_hash.q
-minitez.query.files.shared=orc_merge1.q,orc_merge2.q,orc_merge3.q,orc_merge4.q,alter_merge_orc.q,alter_merge_2_orc.q,alter_merge_stats_orc.q,cross_product_check_1.q,cross_product_check_2.q,dynpart_sort_opt_vectorization.q,dynpart_sort_optimization.q,orc_analyze.q,join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q,transform1.q,transform2.q,transf
orm_ppr1.q,transform_ppr2.q,script_env_var1.q,script_env_var2.q,script_pipe.q,scriptfile1.q,metadataonly1.q,temp_table.q,vectorized_ptf.q,optimize_nullscan.q,vector_cast_constant.q,vector_string_concat.q
+minitez.query.files.shared=orc_merge1.q,orc_merge2.q,orc_merge3.q,orc_merge4.q,alter_merge_orc.q,alter_merge_2_orc.q,alter_merge_stats_orc.q,cross_product_check_1.q,cross_product_check_2.q,dynpart_sort_opt_vectorization.q,dynpart_sort_optimization.q,orc_analyze.q,join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q,union2.q,union3.q,union4.q,union5.q,union6.q,union7.q,union8.q,union9.q,transform1.q,transform2.q,transf
orm_ppr1.q,transform_ppr2.q,script_env_var1.q,script_env_var2.q,script_pipe.q,scriptfile1.q,metadataonly1.q,temp_table.q,vectorized_ptf.q,optimize_nullscan.q,vector_cast_constant.q,vector_string_concat.q,vector_decimal_aggregate.q,vector_left_outer_join.q,vectorization_12.q,vectorization_13.q,vectorization_14.q,vectorization_9.q,vectorization_part_project.q,vectorization_short_regress.q,vectorized_mapjoin.q,vectorized_nested_mapjoin.q,vectorized_shufflejoin.q,vectorized_timestamp_funcs.q
beeline.positive.exclude=add_part_exist.q,alter1.q,alter2.q,alter4.q,alter5.q,alter_rename_partition.q,alter_rename_partition_authorization.q,archive.q,archive_corrupt.q,archive_multi.q,archive_mr_1806.q,archive_multi_mr_1806.q,authorization_1.q,authorization_2.q,authorization_4.q,authorization_5.q,authorization_6.q,authorization_7.q,ba_table1.q,ba_table2.q,ba_table3.q,ba_table_udfs.q,binary_table_bincolserde.q,binary_table_colserde.q,cluster.q,columnarserde_create_shortcut.q,combine2.q,constant_prop.q,create_nested_type.q,create_or_replace_view.q,create_struct_table.q,create_union_table.q,database.q,database_location.q,database_properties.q,ddltime.q,describe_database_json.q,drop_database_removes_partition_dirs.q,escape1.q,escape2.q,exim_00_nonpart_empty.q,exim_01_nonpart.q,exim_02_00_part_empty.q,exim_02_part.q,exim_03_nonpart_over_compat.q,exim_04_all_part.q,exim_04_evolved_parts.q,exim_05_some_part.q,exim_06_one_part.q,exim_07_all_part_over_nonoverlap.q,exim_08_nonpart_rename.q,
exim_09_part_spec_nonoverlap.q,exim_10_external_managed.q,exim_11_managed_external.q,exim_12_external_location.q,exim_13_managed_location.q,exim_14_managed_location_over_existing.q,exim_15_external_part.q,exim_16_part_external.q,exim_17_part_managed.q,exim_18_part_external.q,exim_19_00_part_external_location.q,exim_19_part_external_location.q,exim_20_part_managed_location.q,exim_21_export_authsuccess.q,exim_22_import_exist_authsuccess.q,exim_23_import_part_authsuccess.q,exim_24_import_nonexist_authsuccess.q,global_limit.q,groupby_complex_types.q,groupby_complex_types_multi_single_reducer.q,index_auth.q,index_auto.q,index_auto_empty.q,index_bitmap.q,index_bitmap1.q,index_bitmap2.q,index_bitmap3.q,index_bitmap_auto.q,index_bitmap_rc.q,index_compact.q,index_compact_1.q,index_compact_2.q,index_compact_3.q,index_stale_partitioned.q,init_file.q,input16.q,input16_cc.q,input46.q,input_columnarserde.q,input_dynamicserde.q,input_lazyserde.q,input_testxpath3.q,input_testxpath4.q,insert2_overwr
ite_partitions.q,insertexternal1.q,join_thrift.q,lateral_view.q,load_binary_data.q,load_exist_part_authsuccess.q,load_nonpart_authsuccess.q,load_part_authsuccess.q,loadpart_err.q,lock1.q,lock2.q,lock3.q,lock4.q,merge_dynamic_partition.q,multi_insert.q,multi_insert_move_tasks_share_dependencies.q,null_column.q,ppd_clusterby.q,query_with_semi.q,rename_column.q,sample6.q,sample_islocalmode_hook.q,set_processor_namespaces.q,show_tables.q,source.q,split_sample.q,str_to_map.q,transform1.q,udaf_collect_set.q,udaf_context_ngrams.q,udaf_histogram_numeric.q,udaf_ngrams.q,udaf_percentile_approx.q,udf_array.q,udf_bitmap_and.q,udf_bitmap_or.q,udf_explode.q,udf_format_number.q,udf_map.q,udf_map_keys.q,udf_map_values.q,udf_max.q,udf_min.q,udf_named_struct.q,udf_percentile.q,udf_printf.q,udf_sentences.q,udf_sort_array.q,udf_split.q,udf_struct.q,udf_substr.q,udf_translate.q,udf_union.q,udf_xpath.q,udtf_stack.q,view.q,virtual_column.q
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java Tue Aug 5 23:05:47 2014
@@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Map;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExtractOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator;
@@ -113,6 +114,7 @@ public final class OperatorFactory {
vectorOpvec.add(new OpTuple<FileSinkDesc>(FileSinkDesc.class, VectorFileSinkOperator.class));
vectorOpvec.add(new OpTuple<FilterDesc>(FilterDesc.class, VectorFilterOperator.class));
vectorOpvec.add(new OpTuple<LimitDesc>(LimitDesc.class, VectorLimitOperator.class));
+ vectorOpvec.add(new OpTuple<ExtractDesc>(ExtractDesc.class, VectorExtractOperator.class));
}
private static final class OpTuple<T extends OperatorDesc> {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java Tue Aug 5 23:05:47 2014
@@ -136,7 +136,7 @@ public class ReduceRecordProcessor exte
reducer.setParentOperators(null); // clear out any parents as reducer is the
// root
isTagged = redWork.getNeedsTagging();
- vectorized = redWork.getVectorModeOn() != null;
+ vectorized = redWork.getVectorMode();
try {
keyTableDesc = redWork.getKeyDesc();
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java?rev=1616043&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java Tue Aug 5 23:05:47 2014
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExtractDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+/**
+ * Vectorized extract operator implementation. Consumes rows and outputs a
+ * vectorized batch of subobjects.
+ **/
+public class VectorExtractOperator extends ExtractOperator {
+ private static final long serialVersionUID = 1L;
+
+ private int keyColCount;
+ private int valueColCount;
+
+ private transient int [] projectedColumns = null;
+
+ public VectorExtractOperator(VectorizationContext vContext, OperatorDesc conf)
+ throws HiveException {
+ this();
+ this.conf = (ExtractDesc) conf;
+ }
+
+ public VectorExtractOperator() {
+ super();
+ }
+
+ private StructObjectInspector makeStandardStructObjectInspector(StructObjectInspector structObjectInspector) {
+ List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
+ ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
+ ArrayList<String> colNames = new ArrayList<String>();
+ for (StructField field: fields) {
+ colNames.add(field.getFieldName());
+ ois.add(field.getFieldObjectInspector());
+ }
+ return ObjectInspectorFactory
+ .getStandardStructObjectInspector(colNames, ois);
+ }
+
+ @Override
+ protected void initializeOp(Configuration hconf) throws HiveException {
+ outputObjInspector = inputObjInspectors[0];
+ LOG.info("VectorExtractOperator class of outputObjInspector is " + outputObjInspector.getClass().getName());
+ projectedColumns = new int [valueColCount];
+ for (int i = 0; i < valueColCount; i++) {
+ projectedColumns[i] = keyColCount + i;
+ }
+ initializeChildren(hconf);
+ }
+
+ public void setKeyAndValueColCounts(int keyColCount, int valueColCount) {
+ this.keyColCount = keyColCount;
+ this.valueColCount = valueColCount;
+ }
+
+ @Override
+ // Evaluate vectorized batches of rows and forward them.
+ public void processOp(Object row, int tag) throws HiveException {
+ VectorizedRowBatch vrg = (VectorizedRowBatch) row;
+
+ // Project away the key columns...
+ int[] originalProjections = vrg.projectedColumns;
+ int originalProjectionSize = vrg.projectionSize;
+ vrg.projectionSize = valueColCount;
+ vrg.projectedColumns = this.projectedColumns;
+
+ forward(vrg, outputObjInspector);
+
+ // Revert the projected columns back, because vrg will be re-used.
+ vrg.projectionSize = originalProjectionSize;
+ vrg.projectedColumns = originalProjections;
+ }
+}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Tue Aug 5 23:05:47 2014
@@ -36,6 +36,7 @@ import org.apache.hadoop.hive.conf.HiveC
import org.apache.hadoop.hive.ql.exec.*;
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
import org.apache.hadoop.hive.ql.exec.tez.TezTask;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExtractOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
@@ -62,9 +63,12 @@ import org.apache.hadoop.hive.ql.plan.Ex
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceWork;
import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.plan.TezWork;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
@@ -107,6 +111,12 @@ import org.apache.hadoop.hive.ql.udf.UDF
import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
import org.apache.hadoop.hive.ql.udf.UDFYear;
import org.apache.hadoop.hive.ql.udf.generic.*;
+import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.util.ReflectionUtils;
public class Vectorizer implements PhysicalPlanResolver {
@@ -256,7 +266,15 @@ public class Vectorizer implements Physi
class VectorizationDispatcher implements Dispatcher {
+ private PhysicalContext pctx;
+
+ private int keyColCount;
+ private int valueColCount;
+
public VectorizationDispatcher(PhysicalContext pctx) {
+ this.pctx = pctx;
+ keyColCount = 0;
+ valueColCount = 0;
}
@Override
@@ -270,6 +288,9 @@ public class Vectorizer implements Physi
for (BaseWork w: work.getAllWork()) {
if (w instanceof MapWork) {
convertMapWork((MapWork)w);
+ } else if (w instanceof ReduceWork) {
+ // We are only vectorizing Reduce under Tez.
+ convertReduceWork((ReduceWork)w);
}
}
}
@@ -283,6 +304,13 @@ public class Vectorizer implements Physi
}
}
+ private void addMapWorkRules(Map<Rule, NodeProcessor> opRules, NodeProcessor np) {
+ opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + ".*"
+ + FileSinkOperator.getOperatorName()), np);
+ opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + ".*"
+ + ReduceSinkOperator.getOperatorName()), np);
+ }
+
private boolean validateMapWork(MapWork mapWork) throws SemanticException {
// Validate the input format
@@ -297,11 +325,8 @@ public class Vectorizer implements Physi
}
}
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
- ValidationNodeProcessor vnp = new ValidationNodeProcessor();
- opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + ".*"
- + FileSinkOperator.getOperatorName()), vnp);
- opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + ".*"
- + ReduceSinkOperator.getOperatorName()), vnp);
+ MapWorkValidationNodeProcessor vnp = new MapWorkValidationNodeProcessor();
+ addMapWorkRules(opRules, vnp);
Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
GraphWalker ogw = new DefaultGraphWalker(disp);
// iterator the mapper operator tree
@@ -320,14 +345,11 @@ public class Vectorizer implements Physi
}
private void vectorizeMapWork(MapWork mapWork) throws SemanticException {
- LOG.info("Vectorizing task...");
+ LOG.info("Vectorizing MapWork...");
mapWork.setVectorMode(true);
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
- VectorizationNodeProcessor vnp = new VectorizationNodeProcessor(mapWork);
- opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + ".*" +
- ReduceSinkOperator.getOperatorName()), vnp);
- opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + ".*"
- + FileSinkOperator.getOperatorName()), vnp);
+ MapWorkVectorizationNodeProcessor vnp = new MapWorkVectorizationNodeProcessor(mapWork);
+ addMapWorkRules(opRules, vnp);
Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
GraphWalker ogw = new PreOrderWalker(disp);
// iterator the mapper operator tree
@@ -348,9 +370,114 @@ public class Vectorizer implements Physi
return;
}
+
+ private void convertReduceWork(ReduceWork reduceWork) throws SemanticException {
+ boolean ret = validateReduceWork(reduceWork);
+ if (ret) {
+ vectorizeReduceWork(reduceWork);
+ }
+ }
+
+ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork) throws SemanticException {
+ try {
+ // Check key ObjectInspector.
+ ObjectInspector keyObjectInspector = reduceWork.getKeyObjectInspector();
+ if (keyObjectInspector == null || !(keyObjectInspector instanceof StructObjectInspector)) {
+ return false;
+ }
+ StructObjectInspector keyStructObjectInspector = (StructObjectInspector)keyObjectInspector;
+ keyColCount = keyStructObjectInspector.getAllStructFieldRefs().size();
+
+ // Tez doesn't use tagging...
+ if (reduceWork.getNeedsTagging()) {
+ return false;
+ }
+
+ // Check value ObjectInspector.
+ ObjectInspector valueObjectInspector = reduceWork.getValueObjectInspector();
+ if (valueObjectInspector == null || !(valueObjectInspector instanceof StructObjectInspector)) {
+ return false;
+ }
+ StructObjectInspector valueStructObjectInspector = (StructObjectInspector)valueObjectInspector;
+ valueColCount = valueStructObjectInspector.getAllStructFieldRefs().size();
+ } catch (Exception e) {
+ throw new SemanticException(e);
+ }
+ return true;
+ }
+
+ private void addReduceWorkRules(Map<Rule, NodeProcessor> opRules, NodeProcessor np) {
+ opRules.put(new RuleRegExp("R1", ExtractOperator.getOperatorName() + ".*"), np);
+ opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() + ".*"), np);
+ }
+
+ private boolean validateReduceWork(ReduceWork reduceWork) throws SemanticException {
+ // Validate input to ReduceWork.
+ if (!getOnlyStructObjectInspectors(reduceWork)) {
+ return false;
+ }
+ // Now check the reduce operator tree.
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+ ReduceWorkValidationNodeProcessor vnp = new ReduceWorkValidationNodeProcessor();
+ addReduceWorkRules(opRules, vnp);
+ Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
+ GraphWalker ogw = new DefaultGraphWalker(disp);
+ // iterator the reduce operator tree
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.add(reduceWork.getReducer());
+ HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
+ ogw.startWalking(topNodes, nodeOutput);
+ for (Node n : nodeOutput.keySet()) {
+ if (nodeOutput.get(n) != null) {
+ if (!((Boolean)nodeOutput.get(n)).booleanValue()) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ private void vectorizeReduceWork(ReduceWork reduceWork) throws SemanticException {
+ LOG.info("Vectorizing ReduceWork...");
+ reduceWork.setVectorMode(true);
+
+ // For some reason, the DefaultGraphWalker does not descend down from the reducer Operator as expected.
+ // We need to descend down, otherwise it breaks our algorithm that determines VectorizationContext...
+ // Do we use PreOrderWalker instead of DefaultGraphWalker.
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+ ReduceWorkVectorizationNodeProcessor vnp = new ReduceWorkVectorizationNodeProcessor(reduceWork, keyColCount, valueColCount);
+ addReduceWorkRules(opRules, vnp);
+ Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null);
+ GraphWalker ogw = new PreOrderWalker(disp);
+ // iterator the reduce operator tree
+ ArrayList<Node> topNodes = new ArrayList<Node>();
+ topNodes.add(reduceWork.getReducer());
+ LOG.info("vectorizeReduceWork reducer Operator: " + reduceWork.getReducer().getName() + "...");
+ HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
+ ogw.startWalking(topNodes, nodeOutput);
+
+ // Necessary since we are vectorizing the root operator in reduce.
+ reduceWork.setReducer(vnp.getRootVectorOp());
+
+ Operator<? extends OperatorDesc> reducer = reduceWork.getReducer();
+ if (reducer.getType().equals(OperatorType.EXTRACT)) {
+ ((VectorExtractOperator)reducer).setKeyAndValueColCounts(keyColCount, valueColCount);
+ }
+
+ Map<String, Map<Integer, String>> columnVectorTypes = vnp.getScratchColumnVectorTypes();
+ reduceWork.setScratchColumnVectorTypes(columnVectorTypes);
+ Map<String, Map<String, Integer>> columnMap = vnp.getScratchColumnMap();
+ reduceWork.setScratchColumnMap(columnMap);
+
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(String.format("vectorTypes: %s", columnVectorTypes.toString()));
+ LOG.debug(String.format("columnMap: %s", columnMap.toString()));
+ }
+ }
}
- class ValidationNodeProcessor implements NodeProcessor {
+ class MapWorkValidationNodeProcessor implements NodeProcessor {
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
@@ -361,9 +488,9 @@ public class Vectorizer implements Physi
op.getParentOperators().get(0).getType().equals(OperatorType.GROUPBY)) {
return new Boolean(true);
}
- boolean ret = validateOperator(op);
+ boolean ret = validateMapWorkOperator(op);
if (!ret) {
- LOG.info("Operator: " + op.getName() + " could not be vectorized.");
+ LOG.info("MapWork Operator: " + op.getName() + " could not be vectorized.");
return new Boolean(false);
}
}
@@ -371,24 +498,37 @@ public class Vectorizer implements Physi
}
}
- class VectorizationNodeProcessor implements NodeProcessor {
+ class ReduceWorkValidationNodeProcessor implements NodeProcessor {
- private final MapWork mWork;
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ for (Node n : stack) {
+ Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) n;
+ boolean ret = validateReduceWorkOperator(op);
+ if (!ret) {
+ LOG.info("ReduceWork Operator: " + op.getName() + " could not be vectorized.");
+ return new Boolean(false);
+ }
+ }
+ return new Boolean(true);
+ }
+ }
+
+ // This class has common code used by both MapWorkVectorizationNodeProcessor and
+ // ReduceWorkVectorizationNodeProcessor.
+ class VectorizationNodeProcessor implements NodeProcessor {
// This is used to extract scratch column types for each file key
- private final Map<String, VectorizationContext> scratchColumnContext =
+ protected final Map<String, VectorizationContext> scratchColumnContext =
new HashMap<String, VectorizationContext>();
- private final Map<Operator<? extends OperatorDesc>, VectorizationContext> vContextsByTSOp =
+ protected final Map<Operator<? extends OperatorDesc>, VectorizationContext> vContextsByTSOp =
new HashMap<Operator<? extends OperatorDesc>, VectorizationContext>();
- private final Set<Operator<? extends OperatorDesc>> opsDone =
+ protected final Set<Operator<? extends OperatorDesc>> opsDone =
new HashSet<Operator<? extends OperatorDesc>>();
- public VectorizationNodeProcessor(MapWork mWork) {
- this.mWork = mWork;
- }
-
public Map<String, Map<Integer, String>> getScratchColumnVectorTypes() {
Map<String, Map<Integer, String>> scratchColumnVectorTypes =
new HashMap<String, Map<Integer, String>>();
@@ -411,16 +551,90 @@ public class Vectorizer implements Physi
return scratchColumnMap;
}
+ public VectorizationContext walkStackToFindVectorizationContext(Stack<Node> stack, Operator<? extends OperatorDesc> op)
+ throws SemanticException {
+ VectorizationContext vContext = null;
+ if (stack.size() <= 1) {
+ throw new SemanticException(String.format("Expected operator stack for operator %s to have at least 2 operators", op.getName()));
+ }
+ // Walk down the stack of operators until we found one willing to give us a context.
+ // At the bottom will be the root operator, guaranteed to have a context
+ int i= stack.size()-2;
+ while (vContext == null) {
+ if (i < 0) {
+ throw new SemanticException(String.format("Did not find vectorization context for operator %s in operator stack", op.getName()));
+ }
+ Operator<? extends OperatorDesc> opParent = (Operator<? extends OperatorDesc>) stack.get(i);
+ vContext = vContextsByTSOp.get(opParent);
+ --i;
+ }
+ return vContext;
+ }
+
+ public Boolean nonVectorizableChildOfGroupBy(Operator<? extends OperatorDesc> op) {
+ Operator<? extends OperatorDesc> currentOp = op;
+ while (currentOp.getParentOperators().size() > 0) {
+ currentOp = currentOp.getParentOperators().get(0);
+ if (currentOp.getType().equals(OperatorType.GROUPBY)) {
+ // No need to vectorize
+ if (!opsDone.contains(op)) {
+ opsDone.add(op);
+ }
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public Operator<? extends OperatorDesc> doVectorize(Operator<? extends OperatorDesc> op, VectorizationContext vContext)
+ throws SemanticException {
+ Operator<? extends OperatorDesc> vectorOp = op;
+ try {
+ if (!opsDone.contains(op)) {
+ vectorOp = vectorizeOperator(op, vContext);
+ opsDone.add(op);
+ if (vectorOp != op) {
+ opsDone.add(vectorOp);
+ }
+ if (vectorOp instanceof VectorizationContextRegion) {
+ VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp;
+ VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext();
+ vContextsByTSOp.put(op, vOutContext);
+ scratchColumnContext.put(vOutContext.getFileKey(), vOutContext);
+ }
+ }
+ } catch (HiveException e) {
+ throw new SemanticException(e);
+ }
+ return vectorOp;
+ }
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ throw new SemanticException("Must be overridden");
+ }
+ }
+
+ class MapWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {
+
+ private final MapWork mWork;
+
+ public MapWorkVectorizationNodeProcessor(MapWork mWork) {
+ this.mWork = mWork;
+ }
+
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
+ LOG.info("MapWorkVectorizationNodeProcessor processing Operator: " + op.getName() + "...");
VectorizationContext vContext = null;
if (op instanceof TableScanOperator) {
- vContext = getVectorizationContext((TableScanOperator) op, physicalContext);
+ vContext = getVectorizationContext(op, physicalContext);
for (String onefile : mWork.getPathToAliases().keySet()) {
List<String> aliases = mWork.getPathToAliases().get(onefile);
for (String alias : aliases) {
@@ -438,45 +652,76 @@ public class Vectorizer implements Physi
}
vContextsByTSOp.put(op, vContext);
} else {
- assert stack.size() > 1;
- // Walk down the stack of operators until we found one willing to give us a context.
- // At the bottom will be the TS operator, guaranteed to have a context
- int i= stack.size()-2;
- while (vContext == null) {
- Operator<? extends OperatorDesc> opParent = (Operator<? extends OperatorDesc>) stack.get(i);
- vContext = vContextsByTSOp.get(opParent);
- --i;
- }
+ vContext = walkStackToFindVectorizationContext(stack, op);
}
assert vContext != null;
- if ((op.getType().equals(OperatorType.REDUCESINK) || op.getType().equals(OperatorType.FILESINK)) &&
- op.getParentOperators().get(0).getType().equals(OperatorType.GROUPBY)) {
- // No need to vectorize
- if (!opsDone.contains(op)) {
- opsDone.add(op);
- }
+ // Currently, Vectorized GROUPBY outputs rows, not vectorized row batchs. So, don't vectorize
+ // any operators below GROUPBY.
+ if (nonVectorizableChildOfGroupBy(op)) {
+ return null;
+ }
+
+ doVectorize(op, vContext);
+
+ return null;
+ }
+ }
+
+ class ReduceWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {
+
+ private final ReduceWork rWork;
+ private int keyColCount;
+ private int valueColCount;
+ private Map<String, Integer> reduceColumnNameMap;
+
+ private Operator<? extends OperatorDesc> rootVectorOp;
+
+ public Operator<? extends OperatorDesc> getRootVectorOp() {
+ return rootVectorOp;
+ }
+
+ public ReduceWorkVectorizationNodeProcessor(ReduceWork rWork, int keyColCount, int valueColCount) {
+ this.rWork = rWork;
+ reduceColumnNameMap = rWork.getReduceColumnNameMap();
+ this.keyColCount = keyColCount;
+ this.valueColCount = valueColCount;
+ rootVectorOp = null;
+ }
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+
+ Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
+ LOG.info("ReduceWorkVectorizationNodeProcessor processing Operator: " + op.getName() + "...");
+
+ VectorizationContext vContext = null;
+
+ boolean saveRootVectorOp = false;
+
+ if (op.getParentOperators().size() == 0) {
+ vContext = getReduceVectorizationContext(reduceColumnNameMap);
+ vContextsByTSOp.put(op, vContext);
+ saveRootVectorOp = true;
} else {
- try {
- if (!opsDone.contains(op)) {
- Operator<? extends OperatorDesc> vectorOp =
- vectorizeOperator(op, vContext);
- opsDone.add(op);
- if (vectorOp != op) {
- opsDone.add(vectorOp);
- }
- if (vectorOp instanceof VectorizationContextRegion) {
- VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp;
- VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext();
- vContextsByTSOp.put(op, vOutContext);
- scratchColumnContext.put(vOutContext.getFileKey(), vOutContext);
- }
- }
- } catch (HiveException e) {
- throw new SemanticException(e);
- }
+ vContext = walkStackToFindVectorizationContext(stack, op);
+ }
+
+ assert vContext != null;
+
+ // Currently, Vectorized GROUPBY outputs rows, not vectorized row batchs. So, don't vectorize
+ // any operators below GROUPBY.
+ if (nonVectorizableChildOfGroupBy(op)) {
+ return null;
}
+
+ Operator<? extends OperatorDesc> vectorOp = doVectorize(op, vContext);
+ if (saveRootVectorOp && op != vectorOp) {
+ rootVectorOp = vectorOp;
+ }
+
return null;
}
}
@@ -519,7 +764,7 @@ public class Vectorizer implements Physi
return pctx;
}
- boolean validateOperator(Operator<? extends OperatorDesc> op) {
+ boolean validateMapWorkOperator(Operator<? extends OperatorDesc> op) {
boolean ret = false;
switch (op.getType()) {
case MAPJOIN:
@@ -555,6 +800,32 @@ public class Vectorizer implements Physi
return ret;
}
+ boolean validateReduceWorkOperator(Operator<? extends OperatorDesc> op) {
+ boolean ret = false;
+ switch (op.getType()) {
+ case EXTRACT:
+ ret = validateExtractOperator((ExtractOperator) op);
+ break;
+ case FILTER:
+ ret = validateFilterOperator((FilterOperator) op);
+ break;
+ case SELECT:
+ ret = validateSelectOperator((SelectOperator) op);
+ break;
+ case REDUCESINK:
+ ret = validateReduceSinkOperator((ReduceSinkOperator) op);
+ break;
+ case FILESINK:
+ case LIMIT:
+ ret = true;
+ break;
+ default:
+ ret = false;
+ break;
+ }
+ return ret;
+ }
+
private boolean validateSMBMapJoinOperator(SMBMapJoinOperator op) {
SMBJoinDesc desc = op.getConf();
// Validation is the same as for map join, since the 'small' tables are not vectorized
@@ -617,6 +888,15 @@ public class Vectorizer implements Physi
return validateAggregationDesc(op.getConf().getAggregators());
}
+ private boolean validateExtractOperator(ExtractOperator op) {
+ ExprNodeDesc expr = op.getConf().getCol();
+ boolean ret = validateExprNodeDesc(expr);
+ if (!ret) {
+ return false;
+ }
+ return true;
+ }
+
private boolean validateExprNodeDesc(List<ExprNodeDesc> descs) {
return validateExprNodeDesc(descs, VectorExpressionDescriptor.Mode.PROJECTION);
}
@@ -728,7 +1008,7 @@ public class Vectorizer implements Physi
return supportedDataTypesPattern.matcher(type.toLowerCase()).matches();
}
- private VectorizationContext getVectorizationContext(TableScanOperator op,
+ private VectorizationContext getVectorizationContext(Operator op,
PhysicalContext pctx) {
RowSchema rs = op.getSchema();
@@ -741,8 +1021,26 @@ public class Vectorizer implements Physi
}
}
- VectorizationContext vc = new VectorizationContext(cmap, columnCount);
- return vc;
+ return new VectorizationContext(cmap, columnCount);
+ }
+
+ private VectorizationContext getReduceVectorizationContext(Map<String, Integer> reduceColumnNameMap) {
+ return new VectorizationContext(reduceColumnNameMap, reduceColumnNameMap.size());
+ }
+
+ private void fixupParentChildOperators(Operator<? extends OperatorDesc> op, Operator<? extends OperatorDesc> vectorOp) {
+ if (op.getParentOperators() != null) {
+ vectorOp.setParentOperators(op.getParentOperators());
+ for (Operator<? extends OperatorDesc> p : op.getParentOperators()) {
+ p.replaceChild(op, vectorOp);
+ }
+ }
+ if (op.getChildOperators() != null) {
+ vectorOp.setChildOperators(op.getChildOperators());
+ for (Operator<? extends OperatorDesc> c : op.getChildOperators()) {
+ c.replaceParent(op, vectorOp);
+ }
+ }
}
Operator<? extends OperatorDesc> vectorizeOperator(Operator<? extends OperatorDesc> op,
@@ -757,6 +1055,7 @@ public class Vectorizer implements Physi
case FILESINK:
case REDUCESINK:
case LIMIT:
+ case EXTRACT:
vectorOp = OperatorFactory.getVectorOperator(op.getConf(), vContext);
break;
default:
@@ -765,18 +1064,7 @@ public class Vectorizer implements Physi
}
if (vectorOp != op) {
- if (op.getParentOperators() != null) {
- vectorOp.setParentOperators(op.getParentOperators());
- for (Operator<? extends OperatorDesc> p : op.getParentOperators()) {
- p.replaceChild(op, vectorOp);
- }
- }
- if (op.getChildOperators() != null) {
- vectorOp.setChildOperators(op.getChildOperators());
- for (Operator<? extends OperatorDesc> c : op.getChildOperators()) {
- c.replaceParent(op, vectorOp);
- }
- }
+ fixupParentChildOperators(op, vectorOp);
((AbstractOperatorDesc) vectorOp.getConf()).setVectorMode(true);
}
return vectorOp;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java Tue Aug 5 23:05:47 2014
@@ -52,6 +52,11 @@ public abstract class BaseWork extends A
private String name;
+ // Vectorization.
+ protected Map<String, Map<Integer, String>> scratchColumnVectorTypes = null;
+ protected Map<String, Map<String, Integer>> scratchColumnMap = null;
+ protected boolean vectorMode = false;
+
public void setGatheringStats(boolean gatherStats) {
this.gatheringStats = gatherStats;
}
@@ -107,5 +112,31 @@ public abstract class BaseWork extends A
return returnSet;
}
+ public Map<String, Map<Integer, String>> getScratchColumnVectorTypes() {
+ return scratchColumnVectorTypes;
+ }
+
+ public void setScratchColumnVectorTypes(
+ Map<String, Map<Integer, String>> scratchColumnVectorTypes) {
+ this.scratchColumnVectorTypes = scratchColumnVectorTypes;
+ }
+
+ public Map<String, Map<String, Integer>> getScratchColumnMap() {
+ return scratchColumnMap;
+ }
+
+ public void setScratchColumnMap(Map<String, Map<String, Integer>> scratchColumnMap) {
+ this.scratchColumnMap = scratchColumnMap;
+ }
+
+ @Override
+ public void setVectorMode(boolean vectorMode) {
+ this.vectorMode = vectorMode;
+ }
+
+ public boolean getVectorMode() {
+ return vectorMode;
+ }
+
public abstract void configureJobConf(JobConf job);
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java Tue Aug 5 23:05:47 2014
@@ -116,10 +116,6 @@ public class MapWork extends BaseWork {
private boolean useOneNullRowInputFormat;
- private Map<String, Map<Integer, String>> scratchColumnVectorTypes = null;
- private Map<String, Map<String, Integer>> scratchColumnMap = null;
- private boolean vectorMode = false;
-
public MapWork() {}
public MapWork(String name) {
@@ -519,32 +515,6 @@ public class MapWork extends BaseWork {
}
}
- public Map<String, Map<Integer, String>> getScratchColumnVectorTypes() {
- return scratchColumnVectorTypes;
- }
-
- public void setScratchColumnVectorTypes(
- Map<String, Map<Integer, String>> scratchColumnVectorTypes) {
- this.scratchColumnVectorTypes = scratchColumnVectorTypes;
- }
-
- public Map<String, Map<String, Integer>> getScratchColumnMap() {
- return scratchColumnMap;
- }
-
- public void setScratchColumnMap(Map<String, Map<String, Integer>> scratchColumnMap) {
- this.scratchColumnMap = scratchColumnMap;
- }
-
- public boolean getVectorMode() {
- return vectorMode;
- }
-
- @Override
- public void setVectorMode(boolean vectorMode) {
- this.vectorMode = vectorMode;
- }
-
public void logPathToAliases() {
if (LOG.isDebugEnabled()) {
LOG.debug("LOGGING PATH TO ALIASES");
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java Tue Aug 5 23:05:47 2014
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.plan;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashSet;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -30,7 +31,18 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.util.ReflectionUtils;
/**
* ReduceWork represents all the information used to run a reduce task on the cluster.
@@ -84,6 +96,11 @@ public class ReduceWork extends BaseWork
// for auto reduce parallelism - max reducers requested
private int maxReduceTasks;
+ private ObjectInspector keyObjectInspector = null;
+ private ObjectInspector valueObjectInspector = null;
+
+ private Map<String, Integer> reduceColumnNameMap = new LinkedHashMap<String, Integer>();
+
/**
* If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing
* to keySerializeInfo of the ReduceSink
@@ -95,7 +112,90 @@ public class ReduceWork extends BaseWork
}
public TableDesc getKeyDesc() {
- return keyDesc;
+ return keyDesc;
+ }
+
+ private ObjectInspector getObjectInspector(TableDesc desc) {
+ ObjectInspector objectInspector;
+ try {
+ Deserializer deserializer = (SerDe) ReflectionUtils.newInstance(desc
+ .getDeserializerClass(), null);
+ SerDeUtils.initializeSerDe(deserializer, null, desc.getProperties(), null);
+ objectInspector = deserializer.getObjectInspector();
+ } catch (Exception e) {
+ return null;
+ }
+ return objectInspector;
+ }
+
+ public ObjectInspector getKeyObjectInspector() {
+ if (keyObjectInspector == null) {
+ keyObjectInspector = getObjectInspector(keyDesc);
+ }
+ return keyObjectInspector;
+ }
+
+ // Only works when not tagging.
+ public ObjectInspector getValueObjectInspector() {
+ if (needsTagging) {
+ return null;
+ }
+ if (valueObjectInspector == null) {
+ valueObjectInspector = getObjectInspector(tagToValueDesc.get(0));
+ }
+ return valueObjectInspector;
+ }
+
+ private int addToReduceColumnNameMap(StructObjectInspector structObjectInspector, int startIndex, String prefix) {
+ List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
+ int index = startIndex;
+ for (StructField field: fields) {
+ reduceColumnNameMap.put(prefix + "." + field.getFieldName(), index);
+ index++;
+ }
+ return index;
+ }
+
+ public Boolean fillInReduceColumnNameMap() {
+ ObjectInspector keyObjectInspector = getKeyObjectInspector();
+ if (keyObjectInspector == null || !(keyObjectInspector instanceof StructObjectInspector)) {
+ return false;
+ }
+ StructObjectInspector keyStructObjectInspector = (StructObjectInspector) keyObjectInspector;
+
+ ObjectInspector valueObjectInspector = getValueObjectInspector();
+ if (valueObjectInspector == null || !(valueObjectInspector instanceof StructObjectInspector)) {
+ return false;
+ }
+ StructObjectInspector valueStructObjectInspector = (StructObjectInspector) valueObjectInspector;
+
+ int keyCount = addToReduceColumnNameMap(keyStructObjectInspector, 0, Utilities.ReduceField.KEY.toString());
+ addToReduceColumnNameMap(valueStructObjectInspector, keyCount, Utilities.ReduceField.VALUE.toString());
+ return true;
+ }
+
+ public Map<String, Integer> getReduceColumnNameMap() {
+ if (needsTagging) {
+ return null;
+ }
+ if (reduceColumnNameMap.size() == 0) {
+ if (!fillInReduceColumnNameMap()) {
+ return null;
+ }
+ }
+ return reduceColumnNameMap;
+ }
+
+ public List<String> getReduceColumnNames() {
+ if (needsTagging) {
+ return null;
+ }
+ if (reduceColumnNameMap.size() == 0) {
+ if (!fillInReduceColumnNameMap()) {
+ return null;
+ }
+ }
+ return new ArrayList<String>(reduceColumnNameMap.keySet());
}
public List<TableDesc> getTagToValueDesc() {
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java Tue Aug 5 23:05:47 2014
@@ -107,7 +107,7 @@ public class TestVectorizer {
gbyOp.setConf(desc);
Vectorizer v = new Vectorizer();
- Assert.assertTrue(v.validateOperator(gbyOp));
+ Assert.assertTrue(v.validateMapWorkOperator(gbyOp));
VectorGroupByOperator vectorOp = (VectorGroupByOperator) v.vectorizeOperator(gbyOp, vContext);
Assert.assertEquals(VectorUDAFSumLong.class, vectorOp.getAggregators()[0].getClass());
VectorUDAFSumLong udaf = (VectorUDAFSumLong) vectorOp.getAggregators()[0];
@@ -187,7 +187,7 @@ public class TestVectorizer {
mop.setConf(mjdesc);
Vectorizer vectorizer = new Vectorizer();
- Assert.assertTrue(vectorizer.validateOperator(mop));
+ Assert.assertTrue(vectorizer.validateMapWorkOperator(mop));
}
@@ -203,6 +203,6 @@ public class TestVectorizer {
mop.setConf(mjdesc);
Vectorizer vectorizer = new Vectorizer();
- Assert.assertTrue(vectorizer.validateOperator(mop));
+ Assert.assertTrue(vectorizer.validateMapWorkOperator(mop));
}
}
Modified: hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q Tue Aug 5 23:05:47 2014
@@ -1,4 +1,38 @@
SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN
+SELECT ctimestamp1,
+ cfloat,
+ cstring1,
+ cboolean1,
+ cdouble,
+ (-26.28 + cdouble),
+ (-((-26.28 + cdouble))),
+ STDDEV_SAMP((-((-26.28 + cdouble)))),
+ (cfloat * -26.28),
+ MAX(cfloat),
+ (-(cfloat)),
+ (-(MAX(cfloat))),
+ ((-((-26.28 + cdouble))) / 10.175),
+ STDDEV_POP(cfloat),
+ COUNT(cfloat),
+ (-(((-((-26.28 + cdouble))) / 10.175))),
+ (-1.389 % STDDEV_SAMP((-((-26.28 + cdouble))))),
+ (cfloat - cdouble),
+ VAR_POP(cfloat),
+ (VAR_POP(cfloat) % 10.175),
+ VAR_SAMP(cfloat),
+ (-((cfloat - cdouble)))
+FROM alltypesorc
+WHERE (((ctinyint <= cbigint)
+ AND ((cint <= cdouble)
+ OR (ctimestamp2 < ctimestamp1)))
+ AND ((cdouble < ctinyint)
+ AND ((cbigint > -257)
+ OR (cfloat < cint))))
+GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble
+ORDER BY cstring1, cfloat, cdouble, ctimestamp1;
+
SELECT ctimestamp1,
cfloat,
cstring1,
Modified: hive/trunk/ql/src/test/queries/clientpositive/vectorization_15.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorization_15.q?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorization_15.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorization_15.q Tue Aug 5 23:05:47 2014
@@ -1,4 +1,36 @@
SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN
+SELECT cfloat,
+ cboolean1,
+ cdouble,
+ cstring1,
+ ctinyint,
+ cint,
+ ctimestamp1,
+ STDDEV_SAMP(cfloat),
+ (-26.28 - cint),
+ MIN(cdouble),
+ (cdouble * 79.553),
+ (33 % cfloat),
+ STDDEV_SAMP(ctinyint),
+ VAR_POP(ctinyint),
+ (-23 % cdouble),
+ (-(ctinyint)),
+ VAR_SAMP(cint),
+ (cint - cfloat),
+ (-23 % ctinyint),
+ (-((-26.28 - cint))),
+ STDDEV_POP(cint)
+FROM alltypesorc
+WHERE (((cstring2 LIKE '%ss%')
+ OR (cstring1 LIKE '10%'))
+ OR ((cint >= -75)
+ AND ((ctinyint = csmallint)
+ AND (cdouble >= -3728))))
+GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1
+ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1;
+
SELECT cfloat,
cboolean1,
cdouble,
Modified: hive/trunk/ql/src/test/queries/clientpositive/vectorization_16.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorization_16.q?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorization_16.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorization_16.q Tue Aug 5 23:05:47 2014
@@ -1,4 +1,25 @@
SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN
+SELECT cstring1,
+ cdouble,
+ ctimestamp1,
+ (cdouble - 9763215.5639),
+ (-((cdouble - 9763215.5639))),
+ COUNT(cdouble),
+ STDDEV_SAMP(cdouble),
+ (-(STDDEV_SAMP(cdouble))),
+ (STDDEV_SAMP(cdouble) * COUNT(cdouble)),
+ MIN(cdouble),
+ (9763215.5639 / cdouble),
+ (COUNT(cdouble) / -1.389),
+ STDDEV_SAMP(cdouble)
+FROM alltypesorc
+WHERE ((cstring2 LIKE '%b%')
+ AND ((cdouble >= -1.389)
+ OR (cstring1 < 'a')))
+GROUP BY cstring1, cdouble, ctimestamp1;
+
SELECT cstring1,
cdouble,
ctimestamp1,
Modified: hive/trunk/ql/src/test/queries/clientpositive/vectorization_9.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorization_9.q?rev=1616043&r1=1616042&r2=1616043&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorization_9.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorization_9.q Tue Aug 5 23:05:47 2014
@@ -1,4 +1,25 @@
SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN
+SELECT cstring1,
+ cdouble,
+ ctimestamp1,
+ (cdouble - 9763215.5639),
+ (-((cdouble - 9763215.5639))),
+ COUNT(cdouble),
+ STDDEV_SAMP(cdouble),
+ (-(STDDEV_SAMP(cdouble))),
+ (STDDEV_SAMP(cdouble) * COUNT(cdouble)),
+ MIN(cdouble),
+ (9763215.5639 / cdouble),
+ (COUNT(cdouble) / -1.389),
+ STDDEV_SAMP(cdouble)
+FROM alltypesorc
+WHERE ((cstring2 LIKE '%b%')
+ AND ((cdouble >= -1.389)
+ OR (cstring1 < 'a')))
+GROUP BY cstring1, cdouble, ctimestamp1;
+
SELECT cfloat,
cstring1,
cint,
Added: hive/trunk/ql/src/test/results/clientpositive/tez/vector_decimal_aggregate.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/vector_decimal_aggregate.q.out?rev=1616043&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/vector_decimal_aggregate.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/vector_decimal_aggregate.q.out Tue Aug 5 23:05:47 2014
@@ -0,0 +1,117 @@
+PREHOOK: query: CREATE TABLE decimal_vgby STORED AS ORC AS
+ SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1,
+ CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
+ cint
+ FROM alltypesorc
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@alltypesorc
+POSTHOOK: query: CREATE TABLE decimal_vgby STORED AS ORC AS
+ SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1,
+ CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2,
+ cint
+ FROM alltypesorc
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@decimal_vgby
+PREHOOK: query: EXPLAIN SELECT cint,
+ COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1),
+ COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2)
+ FROM decimal_vgby
+ GROUP BY cint
+ HAVING COUNT(*) > 1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT cint,
+ COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1),
+ COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2)
+ FROM decimal_vgby
+ GROUP BY cint
+ HAVING COUNT(*) > 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: decimal_vgby
+ Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14))
+ outputColumnNames: cint, cdecimal1, cdecimal2
+ Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count()
+ keys: cint (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct<count:bigint,sum:decimal(30,10),input:decimal(20,10)>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct<count:bigint,sum:decimal(33,14),input:decimal(23,14)>), _col13 (type: struct<count:bigint,sum:double,variance:double>), _col14 (type: struct<count:bigint,sum:double,variance:double>), _col15 (type: bigint)
+ Execution mode: vectorized
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col15 > 1) (type: boolean)
+ Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: decimal(24,14)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: decimal(27,18)), _col13 (type: double), _col14 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT cint,
+ COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1),
+ COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2)
+ FROM decimal_vgby
+ GROUP BY cint
+ HAVING COUNT(*) > 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@decimal_vgby
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cint,
+ COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1),
+ COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2)
+ FROM decimal_vgby
+ GROUP BY cint
+ HAVING COUNT(*) > 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@decimal_vgby
+#### A masked pattern was here ####
+NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 1633.60810810806667 5695.483082135364 5696.4103077145055 3072 11160.715384615385 -5147.907692307693 6010604.3076923073536 1956.576923076922966667 6821.495748565159 6822.606289190924
+-3728 6 5831542.269248378 -3367.6517567568 5817556.0411483778 969592.67352472963333 2174330.2092403853 2381859.406131774 6 6984454.211097692 -4033.445769230769 6967702.8672438458471 1161283.811207307641183333 2604201.2704476737 2852759.5602156054
+-563 2 -515.621072973 -3367.6517567568 -3883.2728297298 -1941.6364148649 1426.0153418918999 2016.6902366556308 2 -617.5607769230769 -4033.445769230769 -4651.0065461538459 -2325.50327307692295 1707.9424961538462 2415.395441814127
+762 2 5831542.269248378 1531.2194054054 5833073.4886537834 2916536.7443268917 2915005.5249214866 4122440.3477364695 2 6984454.211097692 1833.9456923076925 6986288.1567899996925 3493144.07839499984625 3491310.1327026924 4937458.140118758
+6981 3 5831542.269248378 -515.621072973 5830511.027102432 1943503.67570081066667 2749258.455012492 3367140.1929065133 3 6984454.211097692 -617.5607769230769 6983219.0895438458462 2327739.696514615282066667 3292794.4113115156 4032833.0678006653
+253665376 1024 9767.0054054054 -9779.5486486487 -347484.0818378374 -339.33992366976309 5708.9563478862 5711.745967572779 1024 11697.969230769231 -11712.99230769231 -416182.64030769233089 -406.428359675480791885 6837.632716002934 6840.973851172274
+528534767 1024 5831542.269248378 -9777.1594594595 11646372.8607481068 11373.41099682432305 257528.92988206653 257654.7686043977 1024 6984454.211097692 -11710.130769230771 13948892.79980307629003 13621.965624807691689482 308443.1074570801 308593.82484083984
+626923679 1024 9723.4027027027 -9778.9513513514 10541.0525297287 10.29399661106318 5742.09145323734 5744.897264034267 1024 11645.746153846154 -11712.276923076923 12625.04759999997746 12.329148046874977988 6877.318722794877 6880.679250101603
Added: hive/trunk/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out?rev=1616043&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out Tue Aug 5 23:05:47 2014
@@ -0,0 +1,136 @@
+PREHOOK: query: explain
+select count(*) from (select c.ctinyint
+from alltypesorc c
+left outer join alltypesorc cd
+ on cd.cint = c.cint
+left outer join alltypesorc hd
+ on hd.ctinyint = c.ctinyint
+) t1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from (select c.ctinyint
+from alltypesorc c
+left outer join alltypesorc cd
+ on cd.cint = c.cint
+left outer join alltypesorc hd
+ on hd.ctinyint = c.ctinyint
+) t1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+ Edges:
+ Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
+ Reducer 3 <- Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: hd
+ Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: ctinyint (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: ctinyint (type: tinyint)
+ Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: c
+ Statistics: Num rows: 47154 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ condition expressions:
+ 0 {ctinyint}
+ 1
+ keys:
+ 0 cint (type: int)
+ 1 cint (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 103739 Data size: 414960 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ condition expressions:
+ 0
+ 1
+ keys:
+ 0 _col0 (type: tinyint)
+ 1 ctinyint (type: tinyint)
+ Statistics: Num rows: 114112 Data size: 456456 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 114112 Data size: 456456 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: cd
+ Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: cint (type: int)
+ sort order: +
+ Map-reduce partition columns: cint (type: int)
+ Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from (select c.ctinyint
+from alltypesorc c
+left outer join alltypesorc cd
+ on cd.cint = c.cint
+left outer join alltypesorc hd
+ on hd.ctinyint = c.ctinyint
+) t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from (select c.ctinyint
+from alltypesorc c
+left outer join alltypesorc cd
+ on cd.cint = c.cint
+left outer join alltypesorc hd
+ on hd.ctinyint = c.ctinyint
+) t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+225951785