You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/02/12 06:20:32 UTC
svn commit: r1567528 [1/3] - in /hive/trunk: itests/qtest/
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/exec/tez/
ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/
ql/src/java/org/apache/hadoop/hive/ql/optimize...
Author: gunther
Date: Wed Feb 12 05:20:31 2014
New Revision: 1567528
URL: http://svn.apache.org/r1567528
Log:
HIVE-6218: Stats for row-count not getting updated with Tez insert + dbclass=counter (Patch by Gunther Hagleitner, reviewed by Vikram Dixit K)
Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java
hive/trunk/ql/src/test/results/clientpositive/tez/stats_counter.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/stats_counter_partitioned.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/stats_noscan_1.q.out
Modified:
hive/trunk/itests/qtest/pom.xml
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregatorTez.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java
hive/trunk/ql/src/test/results/clientpositive/tez/bucket2.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/bucket3.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/bucket4.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/ctas.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/merge1.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/merge2.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/ptf.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/subquery_exists.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/tez_dml.q.out
Modified: hive/trunk/itests/qtest/pom.xml
URL: http://svn.apache.org/viewvc/hive/trunk/itests/qtest/pom.xml?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/itests/qtest/pom.xml (original)
+++ hive/trunk/itests/qtest/pom.xml Wed Feb 12 05:20:31 2014
@@ -39,7 +39,7 @@
<minimr.query.files>stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q</minimr.query.files>
<minimr.query.negative.files>cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q</minimr.query.negative.files>
<minitez.query.files>tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q</minitez.query.files>
- <minitez.query.files.shared>join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q</minitez.query.files.shared>
+ <minitez.query.files.shared>join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q</minitez.query.files.shared>
<beeline.positive.exclude>add_part_exist.q,alter1.q,alter2.q,alter4.q,alter5.q,alter_rename_partition.q,alter_rename_partition_authorization.q,archive.q,archive_corrupt.q,archive_multi.q,archive_mr_1806.q,archive_multi_mr_1806.q,authorization_1.q,authorization_2.q,authorization_4.q,authorization_5.q,authorization_6.q,authorization_7.q,ba_table1.q,ba_table2.q,ba_table3.q,ba_table_udfs.q,binary_table_bincolserde.q,binary_table_colserde.q,cluster.q,columnarserde_create_shortcut.q,combine2.q,constant_prop.q,create_nested_type.q,create_or_replace_view.q,create_struct_table.q,create_union_table.q,database.q,database_location.q,database_properties.q,ddltime.q,describe_database_json.q,drop_database_removes_partition_dirs.q,escape1.q,escape2.q,exim_00_nonpart_empty.q,exim_01_nonpart.q,exim_02_00_part_empty.q,exim_02_part.q,exim_03_nonpart_over_compat.q,exim_04_all_part.q,exim_04_evolved_parts.q,exim_05_some_part.q,exim_06_one_part.q,exim_07_all_part_over_nonoverlap.q,exim_08_nonpart_rena
me.q,exim_09_part_spec_nonoverlap.q,exim_10_external_managed.q,exim_11_managed_external.q,exim_12_external_location.q,exim_13_managed_location.q,exim_14_managed_location_over_existing.q,exim_15_external_part.q,exim_16_part_external.q,exim_17_part_managed.q,exim_18_part_external.q,exim_19_00_part_external_location.q,exim_19_part_external_location.q,exim_20_part_managed_location.q,exim_21_export_authsuccess.q,exim_22_import_exist_authsuccess.q,exim_23_import_part_authsuccess.q,exim_24_import_nonexist_authsuccess.q,global_limit.q,groupby_complex_types.q,groupby_complex_types_multi_single_reducer.q,index_auth.q,index_auto.q,index_auto_empty.q,index_bitmap.q,index_bitmap1.q,index_bitmap2.q,index_bitmap3.q,index_bitmap_auto.q,index_bitmap_rc.q,index_compact.q,index_compact_1.q,index_compact_2.q,index_compact_3.q,index_stale_partitioned.q,init_file.q,input16.q,input16_cc.q,input46.q,input_columnarserde.q,input_dynamicserde.q,input_lazyserde.q,input_testxpath3.q,input_testxpath4.q,insert2_o
verwrite_partitions.q,insertexternal1.q,join_thrift.q,lateral_view.q,load_binary_data.q,load_exist_part_authsuccess.q,load_nonpart_authsuccess.q,load_part_authsuccess.q,loadpart_err.q,lock1.q,lock2.q,lock3.q,lock4.q,merge_dynamic_partition.q,multi_insert.q,multi_insert_move_tasks_share_dependencies.q,null_column.q,ppd_clusterby.q,query_with_semi.q,rename_column.q,sample6.q,sample_islocalmode_hook.q,set_processor_namespaces.q,show_tables.q,source.q,split_sample.q,str_to_map.q,transform1.q,udaf_collect_set.q,udaf_context_ngrams.q,udaf_histogram_numeric.q,udaf_ngrams.q,udaf_percentile_approx.q,udf_array.q,udf_bitmap_and.q,udf_bitmap_or.q,udf_explode.q,udf_format_number.q,udf_map.q,udf_map_keys.q,udf_map_values.q,udf_max.q,udf_min.q,udf_named_struct.q,udf_percentile.q,udf_printf.q,udf_sentences.q,udf_sort_array.q,udf_split.q,udf_struct.q,udf_substr.q,udf_translate.q,udf_union.q,udf_xpath.q,udtf_stack.q,view.q,virtual_column.q</beeline.positive.exclude>
</properties>
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java Wed Feb 12 05:20:31 2014
@@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.plan.Lo
import org.apache.hadoop.hive.ql.plan.StatsWork;
import org.apache.hadoop.hive.ql.plan.api.StageType;
import org.apache.hadoop.hive.ql.stats.CounterStatsAggregator;
+import org.apache.hadoop.hive.ql.stats.CounterStatsAggregatorTez;
import org.apache.hadoop.hive.ql.stats.StatsAggregator;
import org.apache.hadoop.hive.ql.stats.StatsFactory;
import org.apache.hadoop.hive.ql.stats.StatsPublisher;
@@ -154,7 +155,8 @@ public class StatsTask extends Task<Stat
int maxPrefixLength = StatsFactory.getMaxPrefixLength(conf);
// "counter" type does not need to collect stats per task
- boolean counterStat = statsAggregator instanceof CounterStatsAggregator;
+ boolean counterStat = statsAggregator instanceof CounterStatsAggregator
+ || statsAggregator instanceof CounterStatsAggregatorTez;
if (partitions == null) {
org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
Map<String, String> parameters = tTable.getParameters();
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java Wed Feb 12 05:20:31 2014
@@ -49,6 +49,8 @@ import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.tez.client.TezSession;
+import org.apache.tez.common.counters.CounterGroup;
+import org.apache.tez.common.counters.TezCounter;
import org.apache.tez.common.counters.TezCounters;
import org.apache.tez.dag.api.DAG;
import org.apache.tez.dag.api.Edge;
@@ -154,6 +156,14 @@ public class TezTask extends Task<TezWor
Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
counters = client.getDAGStatus(statusGetOpts).getDAGCounters();
+ if (LOG.isInfoEnabled()) {
+ for (CounterGroup group: counters) {
+ LOG.info(group.getDisplayName() +":");
+ for (TezCounter counter: group) {
+ LOG.info(" "+counter.getDisplayName()+": "+counter.getValue());
+ }
+ }
+ }
} catch (Exception e) {
LOG.error("Failed to execute tez graph.", e);
// rc will be 1 at this point indicating failure.
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java Wed Feb 12 05:20:31 2014
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.common.Sta
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.MapredContext;
import org.apache.hadoop.hive.ql.io.RCFile.KeyBuffer;
import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileKeyBufferWrapper;
import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileValueBufferWrapper;
@@ -59,6 +60,7 @@ public class PartialScanMapper extends M
private long uncompressedFileSize = 0;
private long rowNo = 0;
private boolean exception = false;
+ private Reporter rp = null;
public final static Log LOG = LogFactory.getLog("PartialScanMapper");
@@ -68,6 +70,7 @@ public class PartialScanMapper extends M
@Override
public void configure(JobConf job) {
jc = job;
+ MapredContext.init(true, new JobConf(jc));
statsAggKeyPrefix = HiveConf.getVar(job,
HiveConf.ConfVars.HIVE_STATS_KEY_PREFIX);
}
@@ -77,6 +80,12 @@ public class PartialScanMapper extends M
public void map(Object k, RCFileValueBufferWrapper value,
OutputCollector<Object, Object> output, Reporter reporter)
throws IOException {
+
+ if (rp == null) {
+ this.rp = reporter;
+ MapredContext.get().setReporter(reporter);
+ }
+
try {
//CombineHiveInputFormat is set in PartialScanTask.
RCFileKeyBufferWrapper key = (RCFileKeyBufferWrapper) ((CombineHiveKey) k).getKey();
@@ -114,6 +123,8 @@ public class PartialScanMapper extends M
} catch (HiveException e) {
this.exception = true;
throw new RuntimeException(e);
+ } finally {
+ MapredContext.close();
}
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java Wed Feb 12 05:20:31 2014
@@ -24,6 +24,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.lang.StringBuffer;
import java.util.Stack;
import org.apache.hadoop.fs.Path;
@@ -124,23 +125,10 @@ public class GenMRTableScan1 implements
if (currWork.getReduceWork() != null) {
currWork.getReduceWork().setGatheringStats(true);
}
+
// NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list,
// and pass it to setTaskPlan as the last parameter
- Set<Partition> confirmedPartns = new HashSet<Partition>();
- tableSpec tblSpec = parseInfo.getTableSpec();
- if (tblSpec.specType == tableSpec.SpecType.STATIC_PARTITION) {
- // static partition
- if (tblSpec.partHandle != null) {
- confirmedPartns.add(tblSpec.partHandle);
- } else {
- // partial partition spec has null partHandle
- assert parseInfo.isNoScanAnalyzeCommand();
- confirmedPartns.addAll(tblSpec.partitions);
- }
- } else if (tblSpec.specType == tableSpec.SpecType.DYNAMIC_PARTITION) {
- // dynamic partition
- confirmedPartns.addAll(tblSpec.partitions);
- }
+ Set<Partition> confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(parseInfo);
if (confirmedPartns.size() > 0) {
Table source = parseCtx.getQB().getMetaData().getTableForAlias(alias);
PrunedPartitionList partList = new PrunedPartitionList(source, confirmedPartns, false);
@@ -174,24 +162,9 @@ public class GenMRTableScan1 implements
Task<? extends Serializable> currTask, QBParseInfo parseInfo, StatsWork statsWork,
Task<StatsWork> statsTask) throws SemanticException {
String aggregationKey = op.getConf().getStatsAggPrefix();
- List<Path> inputPaths = new ArrayList<Path>();
- switch (parseInfo.getTableSpec().specType) {
- case TABLE_ONLY:
- inputPaths.add(parseInfo.getTableSpec().tableHandle.getPath());
- break;
- case STATIC_PARTITION:
- Partition part = parseInfo.getTableSpec().partHandle;
- try {
- aggregationKey += Warehouse.makePartPath(part.getSpec());
- } catch (MetaException e) {
- throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_AGGKEY.getMsg(
- part.getDataLocation().toString() + e.getMessage()));
- }
- inputPaths.add(part.getDataLocation());
- break;
- default:
- assert false;
- }
+ StringBuffer aggregationKeyBuffer = new StringBuffer(aggregationKey);
+ List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(parseInfo, aggregationKeyBuffer);
+ aggregationKey = aggregationKeyBuffer.toString();
// scan work
PartialScanWork scanWork = new PartialScanWork(inputPaths);
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Wed Feb 12 05:20:31 2014
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.optimi
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
@@ -28,12 +29,14 @@ import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
+import java.lang.StringBuffer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
@@ -60,14 +63,18 @@ import org.apache.hadoop.hive.ql.io.RCFi
import org.apache.hadoop.hive.ql.io.rcfile.merge.MergeWork;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx;
import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPruner;
import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec;
import org.apache.hadoop.hive.ql.parse.OpParseContext;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.parse.QBJoinTree;
+import org.apache.hadoop.hive.ql.parse.QBParseInfo;
import org.apache.hadoop.hive.ql.parse.RowResolver;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.BaseWork;
@@ -1704,6 +1711,48 @@ public final class GenMapRedUtils {
return dest;
}
+ public static Set<Partition> getConfirmedPartitionsForScan(QBParseInfo parseInfo) {
+ Set<Partition> confirmedPartns = new HashSet<Partition>();
+ tableSpec tblSpec = parseInfo.getTableSpec();
+ if (tblSpec.specType == tableSpec.SpecType.STATIC_PARTITION) {
+ // static partition
+ if (tblSpec.partHandle != null) {
+ confirmedPartns.add(tblSpec.partHandle);
+ } else {
+ // partial partition spec has null partHandle
+ assert parseInfo.isNoScanAnalyzeCommand();
+ confirmedPartns.addAll(tblSpec.partitions);
+ }
+ } else if (tblSpec.specType == tableSpec.SpecType.DYNAMIC_PARTITION) {
+ // dynamic partition
+ confirmedPartns.addAll(tblSpec.partitions);
+ }
+ return confirmedPartns;
+ }
+
+ public static List<Path> getInputPathsForPartialScan(QBParseInfo parseInfo, StringBuffer aggregationKey)
+ throws SemanticException {
+ List<Path> inputPaths = new ArrayList<Path>();
+ switch (parseInfo.getTableSpec().specType) {
+ case TABLE_ONLY:
+ inputPaths.add(parseInfo.getTableSpec().tableHandle.getPath());
+ break;
+ case STATIC_PARTITION:
+ Partition part = parseInfo.getTableSpec().partHandle;
+ try {
+ aggregationKey.append(Warehouse.makePartPath(part.getSpec()));
+ } catch (MetaException e) {
+ throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_AGGKEY.getMsg(
+ part.getDataLocation().toString() + e.getMessage()));
+ }
+ inputPaths.add(part.getDataLocation());
+ break;
+ default:
+ assert false;
+ }
+ return inputPaths;
+ }
+
private GenMapRedUtils() {
// prevent instantiation
}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java?rev=1567528&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java Wed Feb 12 05:20:31 2014
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.parse;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils;
+import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceWork;
+import org.apache.hadoop.hive.ql.plan.TezWork;
+import org.apache.hadoop.hive.ql.plan.TezWork.EdgeType;
+
+/**
+ * GenTezUtils is a collection of shared helper methods to produce
+ * TezWork
+ */
+public class GenTezUtils {
+
+ static final private Log LOG = LogFactory.getLog(GenTezUtils.class.getName());
+
+ // sequence number is used to name vertices (e.g.: Map 1, Reduce 14, ...)
+ private int sequenceNumber = 0;
+
+ // singleton
+ private static GenTezUtils utils;
+
+ public static GenTezUtils getUtils() {
+ if (utils == null) {
+ utils = new GenTezUtils();
+ }
+ return utils;
+ }
+
+ protected GenTezUtils() {
+ }
+
+ public void resetSequenceNumber() {
+ sequenceNumber = 0;
+ }
+
+ public ReduceWork createReduceWork(GenTezProcContext context, Operator<?> root, TezWork tezWork) {
+ assert !root.getParentOperators().isEmpty();
+ ReduceWork reduceWork = new ReduceWork("Reducer "+ (++sequenceNumber));
+ LOG.debug("Adding reduce work (" + reduceWork.getName() + ") for " + root);
+ reduceWork.setReducer(root);
+ reduceWork.setNeedsTagging(GenMapRedUtils.needsTagging(reduceWork));
+
+ // All parents should be reduce sinks. We pick the one we just walked
+ // to choose the number of reducers. In the join/union case they will
+ // all be -1. In sort/order case where it matters there will be only
+ // one parent.
+ assert context.parentOfRoot instanceof ReduceSinkOperator;
+ ReduceSinkOperator reduceSink = (ReduceSinkOperator) context.parentOfRoot;
+
+ reduceWork.setNumReduceTasks(reduceSink.getConf().getNumReducers());
+
+ setupReduceSink(context, reduceWork, reduceSink);
+
+ tezWork.add(reduceWork);
+ tezWork.connect(
+ context.preceedingWork,
+ reduceWork, EdgeType.SIMPLE_EDGE);
+
+ return reduceWork;
+ }
+
+ protected void setupReduceSink(GenTezProcContext context, ReduceWork reduceWork,
+ ReduceSinkOperator reduceSink) {
+
+ LOG.debug("Setting up reduce sink: " + reduceSink
+ + " with following reduce work: " + reduceWork.getName());
+
+ // need to fill in information about the key and value in the reducer
+ GenMapRedUtils.setKeyAndValueDesc(reduceWork, reduceSink);
+
+ // remember which parent belongs to which tag
+ reduceWork.getTagToInput().put(reduceSink.getConf().getTag(),
+ context.preceedingWork.getName());
+
+ // remember the output name of the reduce sink
+ reduceSink.getConf().setOutputName(reduceWork.getName());
+ }
+
+ public MapWork createMapWork(GenTezProcContext context, Operator<?> root,
+ TezWork tezWork, PrunedPartitionList partitions) throws SemanticException {
+ assert root.getParentOperators().isEmpty();
+ MapWork mapWork = new MapWork("Map "+ (++sequenceNumber));
+ LOG.debug("Adding map work (" + mapWork.getName() + ") for " + root);
+
+ // map work starts with table scan operators
+ assert root instanceof TableScanOperator;
+ String alias = ((TableScanOperator)root).getConf().getAlias();
+
+ setupMapWork(mapWork, context, partitions, root, alias);
+
+ // add new item to the tez work
+ tezWork.add(mapWork);
+
+ return mapWork;
+ }
+
+ // this method's main use is to help unit testing this class
+ protected void setupMapWork(MapWork mapWork, GenTezProcContext context,
+ PrunedPartitionList partitions, Operator<? extends OperatorDesc> root,
+ String alias) throws SemanticException {
+ // All the setup is done in GenMapRedUtils
+ GenMapRedUtils.setMapWork(mapWork, context.parseContext,
+ context.inputs, partitions, root, alias, context.conf, false);
+ }
+}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java Wed Feb 12 05:20:31 2014
@@ -49,8 +49,15 @@ public class GenTezWork implements NodeP
static final private Log LOG = LogFactory.getLog(GenTezWork.class.getName());
- // sequence number is used to name vertices (e.g.: Map 1, Reduce 14, ...)
- private int sequenceNumber = 0;
+ // instance of shared utils
+ private GenTezUtils utils = null;
+
+ /**
+ * Constructor takes utils as parameter to facilitate testing
+ */
+ public GenTezWork(GenTezUtils utils) {
+ this.utils = utils;
+ }
@Override
public Object process(Node nd, Stack<Node> stack,
@@ -92,9 +99,9 @@ public class GenTezWork implements NodeP
} else {
// create a new vertex
if (context.preceedingWork == null) {
- work = createMapWork(context, root, tezWork);
+ work = utils.createMapWork(context, root, tezWork, null);
} else {
- work = createReduceWork(context, root, tezWork);
+ work = utils.createReduceWork(context, root, tezWork);
}
context.rootToWorkMap.put(root, work);
}
@@ -186,74 +193,4 @@ public class GenTezWork implements NodeP
return null;
}
-
- protected ReduceWork createReduceWork(GenTezProcContext context, Operator<?> root,
- TezWork tezWork) {
- assert !root.getParentOperators().isEmpty();
- ReduceWork reduceWork = new ReduceWork("Reducer "+ (++sequenceNumber));
- LOG.debug("Adding reduce work (" + reduceWork.getName() + ") for " + root);
- reduceWork.setReducer(root);
- reduceWork.setNeedsTagging(GenMapRedUtils.needsTagging(reduceWork));
-
- // All parents should be reduce sinks. We pick the one we just walked
- // to choose the number of reducers. In the join/union case they will
- // all be -1. In sort/order case where it matters there will be only
- // one parent.
- assert context.parentOfRoot instanceof ReduceSinkOperator;
- ReduceSinkOperator reduceSink = (ReduceSinkOperator) context.parentOfRoot;
-
- reduceWork.setNumReduceTasks(reduceSink.getConf().getNumReducers());
-
- setupReduceSink(context, reduceWork, reduceSink);
-
- tezWork.add(reduceWork);
- tezWork.connect(
- context.preceedingWork,
- reduceWork, EdgeType.SIMPLE_EDGE);
-
- return reduceWork;
- }
-
- protected void setupReduceSink(GenTezProcContext context, ReduceWork reduceWork,
- ReduceSinkOperator reduceSink) {
-
- LOG.debug("Setting up reduce sink: " + reduceSink
- + " with following reduce work: " + reduceWork.getName());
-
- // need to fill in information about the key and value in the reducer
- GenMapRedUtils.setKeyAndValueDesc(reduceWork, reduceSink);
-
- // remember which parent belongs to which tag
- reduceWork.getTagToInput().put(reduceSink.getConf().getTag(),
- context.preceedingWork.getName());
-
- // remember the output name of the reduce sink
- reduceSink.getConf().setOutputName(reduceWork.getName());
- }
-
- protected MapWork createMapWork(GenTezProcContext context, Operator<?> root,
- TezWork tezWork) throws SemanticException {
- assert root.getParentOperators().isEmpty();
- MapWork mapWork = new MapWork("Map "+ (++sequenceNumber));
- LOG.debug("Adding map work (" + mapWork.getName() + ") for " + root);
-
- // map work starts with table scan operators
- assert root instanceof TableScanOperator;
- String alias = ((TableScanOperator)root).getConf().getAlias();
-
- setupMapWork(mapWork, context, root, alias);
-
- // add new item to the tez work
- tezWork.add(mapWork);
-
- return mapWork;
- }
-
- // this method's main use is to help unit testing this class
- protected void setupMapWork(MapWork mapWork, GenTezProcContext context,
- Operator<? extends OperatorDesc> root, String alias) throws SemanticException {
- // All the setup is done in GenMapRedUtils
- GenMapRedUtils.setMapWork(mapWork, context.parseContext,
- context.inputs, null, root, alias, context.conf, false);
- }
}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java?rev=1567528&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java Wed Feb 12 05:20:31 2014
@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.parse;
+
+import java.lang.StringBuffer;
+import java.util.List;
+import java.util.Set;
+import java.util.Stack;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.DriverContext;
+import org.apache.hadoop.hive.ql.ErrorMsg;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec;
+import org.apache.hadoop.hive.ql.parse.GenTezWork;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
+import org.apache.hadoop.hive.ql.parse.QBParseInfo;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.plan.TezWork;
+import org.apache.hadoop.hive.ql.plan.StatsWork;
+
+/**
+ * ProcessAnalyzeTable sets up work for the several variants of analyze table
+ * (normal, no scan, partial scan.) The plan at this point will be a single
+ * table scan operator.
+ */
+public class ProcessAnalyzeTable implements NodeProcessor {
+
+ static final private Log LOG = LogFactory.getLog(ProcessAnalyzeTable.class.getName());
+
+ // shared plan utils for tez
+ private GenTezUtils utils = null;
+
+ /**
+ * Injecting the utils in the constructor facilitates testing
+ */
+ public ProcessAnalyzeTable(GenTezUtils utils) {
+ this.utils = utils;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public Object process(Node nd, Stack<Node> stack,
+ NodeProcessorCtx procContext, Object... nodeOutputs)
+ throws SemanticException {
+
+ GenTezProcContext context = (GenTezProcContext) procContext;
+
+ TableScanOperator tableScan = (TableScanOperator) nd;
+
+ ParseContext parseContext = context.parseContext;
+ QB queryBlock = parseContext.getQB();
+ QBParseInfo parseInfo = parseContext.getQB().getParseInfo();
+
+ if (parseInfo.isAnalyzeCommand()) {
+
+ assert tableScan.getChildOperators() == null
+ || tableScan.getChildOperators().size() == 0;
+
+ String alias = null;
+ for (String a: parseContext.getTopOps().keySet()) {
+ if (tableScan == parseContext.getTopOps().get(a)) {
+ alias = a;
+ }
+ }
+
+ assert alias != null;
+
+ TezWork tezWork = context.currentTask.getWork();
+
+ // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
+ // The plan consists of a simple TezTask followed by a StatsTask.
+ // The Tez task is just a simple TableScanOperator
+
+ StatsWork statsWork = new StatsWork(parseInfo.getTableSpec());
+ statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix());
+ statsWork.setSourceTask(context.currentTask);
+ statsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
+ Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseContext.getConf());
+ context.currentTask.addDependentTask(statsTask);
+
+ // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
+ // The plan consists of a StatsTask only.
+ if (parseInfo.isNoScanAnalyzeCommand()) {
+ statsTask.setParentTasks(null);
+ statsWork.setNoScanAnalyzeCommand(true);
+ context.rootTasks.remove(context.currentTask);
+ context.rootTasks.add(statsTask);
+ }
+
+ // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
+ if (parseInfo.isPartialScanAnalyzeCommand()) {
+ handlePartialScanCommand(tableScan, parseContext, parseInfo, statsWork, context, statsTask);
+ }
+
+ // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list,
+ // and pass it to setTaskPlan as the last parameter
+ Set<Partition> confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(parseInfo);
+ PrunedPartitionList partitions = null;
+ if (confirmedPartns.size() > 0) {
+ Table source = queryBlock.getMetaData().getTableForAlias(alias);
+ partitions = new PrunedPartitionList(source, confirmedPartns, false);
+ }
+
+ MapWork w = utils.createMapWork(context, tableScan, tezWork, partitions);
+ w.setGatheringStats(true);
+
+ return true;
+ }
+
+ return null;
+ }
+
+ /**
+ * handle partial scan command.
+ *
+ * It is composed of PartialScanTask followed by StatsTask.
+ */
+ private void handlePartialScanCommand(TableScanOperator tableScan, ParseContext parseContext,
+ QBParseInfo parseInfo, StatsWork statsWork, GenTezProcContext context,
+ Task<StatsWork> statsTask) throws SemanticException {
+
+ String aggregationKey = tableScan.getConf().getStatsAggPrefix();
+ StringBuffer aggregationKeyBuffer = new StringBuffer(aggregationKey);
+ List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(parseInfo, aggregationKeyBuffer);
+ aggregationKey = aggregationKeyBuffer.toString();
+
+ // scan work
+ PartialScanWork scanWork = new PartialScanWork(inputPaths);
+ scanWork.setMapperCannotSpanPartns(true);
+ scanWork.setAggKey(aggregationKey);
+
+ // stats work
+ statsWork.setPartialScanAnalyzeCommand(true);
+
+ // partial scan task
+ DriverContext driverCxt = new DriverContext();
+ Task<PartialScanWork> partialScanTask = TaskFactory.get(scanWork, parseContext.getConf());
+ partialScanTask.initialize(parseContext.getConf(), null, driverCxt);
+ partialScanTask.setWork(scanWork);
+ statsWork.setSourceTask(partialScanTask);
+
+ // task dependency
+ context.rootTasks.remove(context.currentTask);
+ context.rootTasks.add(partialScanTask);
+ partialScanTask.addDependentTask(statsTask);
+ }
+}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java Wed Feb 12 05:20:31 2014
@@ -35,8 +35,7 @@ public class TaskCompilerFactory {
* into executable units.
*/
public static TaskCompiler getCompiler(HiveConf conf, ParseContext parseContext) {
- if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")
- && !parseContext.getQB().getParseInfo().isAnalyzeCommand()) {
+ if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
return new TezCompiler();
} else {
return new MapReduceCompiler();
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java Wed Feb 12 05:20:31 2014
@@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.exec.Jo
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.exec.tez.TezTask;
@@ -109,8 +110,10 @@ public class TezCompiler extends TaskCom
List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs)
throws SemanticException {
+ GenTezUtils.getUtils().resetSequenceNumber();
+
ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
- GenTezWork genTezWork = new GenTezWork();
+ GenTezWork genTezWork = new GenTezWork(GenTezUtils.getUtils());
GenTezProcContext procCtx = new GenTezProcContext(
conf, tempParseContext, mvTask, rootTasks, inputs, outputs);
@@ -131,6 +134,10 @@ public class TezCompiler extends TaskCom
FileSinkOperator.getOperatorName() + "%"),
new CompositeProcessor(new FileSinkProcessor(), genTezWork));
+ opRules.put(new RuleRegExp("Handle Potential Analyze Command",
+ TableScanOperator.getOperatorName() + "%"),
+ new ProcessAnalyzeTable(GenTezUtils.getUtils()));
+
opRules.put(new RuleRegExp("Bail on Union",
UnionOperator.getOperatorName() + "%"), new NodeProcessor()
{
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregatorTez.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregatorTez.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregatorTez.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregatorTez.java Wed Feb 12 05:20:31 2014
@@ -57,17 +57,21 @@ public class CounterStatsAggregatorTez i
@Override
public String aggregateStats(String keyPrefix, String statType) {
- if (delegate) {
- return mrAggregator.aggregateStats(keyPrefix, statType);
- }
+ String result;
- long value = 0;
- for (String groupName : counters.getGroupNames()) {
- if (groupName.startsWith(keyPrefix)) {
- value += counters.getGroup(groupName).findCounter(statType).getValue();
+ if (delegate) {
+ result = mrAggregator.aggregateStats(keyPrefix, statType);
+ } else {
+ long value = 0;
+ for (String groupName : counters.getGroupNames()) {
+ if (groupName.startsWith(keyPrefix)) {
+ value += counters.getGroup(groupName).findCounter(statType).getValue();
+ }
}
+ result = String.valueOf(value);
}
- return String.valueOf(value);
+ LOG.info("Counter based stats for ("+keyPrefix+") are: "+result);
+ return result;
}
@Override
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java Wed Feb 12 05:20:31 2014
@@ -76,17 +76,19 @@ public class TestGenTezWork {
(Set<ReadEntity>)Collections.EMPTY_SET,
(Set<WriteEntity>)Collections.EMPTY_SET);
- proc = new GenTezWork() {
+ proc = new GenTezWork(new GenTezUtils() {
@Override
- protected void setupMapWork(MapWork mapWork, GenTezProcContext context,
- Operator<? extends OperatorDesc> root, String alias) throws SemanticException {
+ protected void setupMapWork(MapWork mapWork, GenTezProcContext context,
+ PrunedPartitionList partitions, Operator<? extends OperatorDesc> root, String alias)
+ throws SemanticException {
+
LinkedHashMap<String, Operator<? extends OperatorDesc>> map
= new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
map.put("foo", root);
mapWork.setAliasToWork(map);
return;
}
- };
+ });
fs = new FileSinkOperator();
fs.setConf(new FileSinkDesc());
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/bucket2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/bucket2.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/bucket2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/bucket2.q.out Wed Feb 12 05:20:31 2014
@@ -201,26 +201,26 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: s
- Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean)
- Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
- Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Extract
- Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/bucket3.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/bucket3.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/bucket3.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/bucket3.q.out Wed Feb 12 05:20:31 2014
@@ -226,26 +226,26 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: s
- Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean)
- Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int), value (type: string), ds (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
- Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
Reducer 2
Reduce Operator Tree:
Extract
- Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/bucket4.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/bucket4.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/bucket4.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/bucket4.q.out Wed Feb 12 05:20:31 2014
@@ -202,17 +202,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: s
- Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean)
- Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/ctas.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/ctas.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/ctas.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/ctas.q.out Wed Feb 12 05:20:31 2014
@@ -374,8 +374,8 @@ Table Type: MANAGED_TABLE
Table Parameters:
COLUMN_STATS_ACCURATE true
numFiles 1
- numRows 0
- rawDataSize 0
+ numRows 10
+ rawDataSize 96
totalSize 106
#### A masked pattern was here ####
@@ -540,8 +540,8 @@ Table Type: MANAGED_TABLE
Table Parameters:
COLUMN_STATS_ACCURATE true
numFiles 1
- numRows 0
- rawDataSize 0
+ numRows 10
+ rawDataSize 120
totalSize 199
#### A masked pattern was here ####
@@ -603,8 +603,8 @@ Table Type: MANAGED_TABLE
Table Parameters:
COLUMN_STATS_ACCURATE true
numFiles 1
- numRows 0
- rawDataSize 0
+ numRows 10
+ rawDataSize 120
totalSize 199
#### A masked pattern was here ####
@@ -791,8 +791,8 @@ Table Type: MANAGED_TABLE
Table Parameters:
COLUMN_STATS_ACCURATE true
numFiles 1
- numRows 0
- rawDataSize 0
+ numRows 10
+ rawDataSize 96
totalSize 106
#### A masked pattern was here ####
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out Wed Feb 12 05:20:31 2014
@@ -201,26 +201,26 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: s
- Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean)
- Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
- Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: string)
Reducer 2
Reduce Operator Tree:
Extract
- Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out Wed Feb 12 05:20:31 2014
@@ -144,17 +144,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: f
- Statistics: Num rows: 59 Data size: 236 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: key is not null (type: boolean)
- Statistics: Num rows: 30 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 109 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: int)
sort order: +
Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 30 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 13 Data size: 109 Basic stats: COMPLETE Column stats: NONE
tag: 0
value expressions: key (type: int)
Path -> Alias:
@@ -174,9 +174,9 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.filter_join_breaktask
numFiles 1
- numRows 0
+ numRows 25
partition_columns ds
- rawDataSize 0
+ rawDataSize 211
serialization.ddl struct filter_join_breaktask { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -206,17 +206,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: g
- Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (value <> '') (type: boolean)
- Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: value (type: string)
sort order: +
Map-reduce partition columns: value (type: string)
- Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE
tag: 1
value expressions: value (type: string)
Path -> Alias:
@@ -236,9 +236,9 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.filter_join_breaktask
numFiles 1
- numRows 0
+ numRows 25
partition_columns ds
- rawDataSize 0
+ rawDataSize 211
serialization.ddl struct filter_join_breaktask { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -268,17 +268,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: m
- Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: ((key is not null and value is not null) and (value <> '')) (type: boolean)
- Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 59 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: int)
sort order: +
Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 59 Basic stats: COMPLETE Column stats: NONE
tag: 1
value expressions: value (type: string)
Path -> Alias:
@@ -298,9 +298,9 @@ STAGE PLANS:
#### A masked pattern was here ####
name default.filter_join_breaktask
numFiles 1
- numRows 0
+ numRows 25
partition_columns ds
- rawDataSize 0
+ rawDataSize 211
serialization.ddl struct filter_join_breaktask { i32 key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -336,12 +336,12 @@ STAGE PLANS:
0 {VALUE._col0}
1 {VALUE._col1}
outputColumnNames: _col0, _col6
- Statistics: Num rows: 33 Data size: 132 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 14 Data size: 119 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col6 (type: string)
sort order: +
Map-reduce partition columns: _col6 (type: string)
- Statistics: Num rows: 33 Data size: 132 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 14 Data size: 119 Basic stats: COMPLETE Column stats: NONE
tag: 0
value expressions: _col0 (type: int)
Reducer 3
@@ -354,17 +354,17 @@ STAGE PLANS:
0 {VALUE._col0}
1 {VALUE._col1}
outputColumnNames: _col0, _col11
- Statistics: Num rows: 36 Data size: 145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col11 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 36 Data size: 145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 36 Data size: 145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.TextInputFormat
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/merge1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/merge1.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/merge1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/merge1.q.out Wed Feb 12 05:20:31 2014
@@ -541,14 +541,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: test_src
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -665,14 +665,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: test_src
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/merge2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/merge2.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/merge2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/merge2.q.out Wed Feb 12 05:20:31 2014
@@ -541,14 +541,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: test_src
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -665,14 +665,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: test_src
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out Wed Feb 12 05:20:31 2014
@@ -295,11 +295,11 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: stats_tbl
- Statistics: Num rows: 4731 Data size: 1040907 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
outputColumnNames: s, bo, bin, si, i, b
- Statistics: Num rows: 4731 Data size: 1040907 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b)
mode: hash
@@ -396,11 +396,11 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: stats_tbl_part
- Statistics: Num rows: 4491 Data size: 988274 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
outputColumnNames: s, bo, bin, si, i, b
- Statistics: Num rows: 4491 Data size: 988274 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b)
mode: hash
@@ -698,63 +698,18 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
STAGE DEPENDENCIES:
- Stage-1 is a root stage
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: stats_tbl
- Statistics: Num rows: 4486 Data size: 1040907 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double)
- outputColumnNames: s, bo, bin, si, i, b, f, d
- Statistics: Num rows: 4486 Data size: 1040907 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d)
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint), _col10 (type: float), _col11 (type: double)
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), max(VALUE._col10), min(VALUE._col11)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint), _col10 (type: float), _col11 (type: double)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 1
PREHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
PREHOOK: type: QUERY
-PREHOOK: Input: default@stats_tbl
#### A masked pattern was here ####
POSTHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@stats_tbl
#### A masked pattern was here ####
POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ]
POSTHOOK: Lineage: stats_tbl.bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ]
@@ -800,7 +755,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ]
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
-9999 9999 1999.8000000003176 9999 9999 9999 9999 9999 65791 4294967296 99.98 0.01
+9999 9999 1999.8 9999 9999 9999 9999 9999 65791 0 99.9800033569336 0.0
PREHOOK: query: explain
select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
PREHOOK: type: QUERY
@@ -852,69 +807,18 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
STAGE DEPENDENCIES:
- Stage-1 is a root stage
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: stats_tbl_part
- Statistics: Num rows: 4258 Data size: 988274 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double)
- outputColumnNames: s, bo, bin, si, i, b, f, d
- Statistics: Num rows: 4258 Data size: 988274 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d)
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint), _col10 (type: float), _col11 (type: double)
- Reducer 2
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), max(VALUE._col10), min(VALUE._col11)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint), _col10 (type: float), _col11 (type: double)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 1
PREHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
PREHOOK: type: QUERY
-PREHOOK: Input: default@stats_tbl_part
-PREHOOK: Input: default@stats_tbl_part@dt=2010
-PREHOOK: Input: default@stats_tbl_part@dt=2011
-PREHOOK: Input: default@stats_tbl_part@dt=2012
#### A masked pattern was here ####
POSTHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@stats_tbl_part
-POSTHOOK: Input: default@stats_tbl_part@dt=2010
-POSTHOOK: Input: default@stats_tbl_part@dt=2011
-POSTHOOK: Input: default@stats_tbl_part@dt=2012
#### A masked pattern was here ####
POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ]
POSTHOOK: Lineage: stats_tbl.bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ]
@@ -960,7 +864,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ]
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
-9489 9489 1897.8000000002944 9489 9489 9489 9489 9489 65791 4294967296 99.98 0.01
+9489 9489 1897.8 9489 9489 9489 9489 9489 65791 0 99.9800033569336 0.0
PREHOOK: query: explain select count(ts) from stats_tbl_part
PREHOOK: type: QUERY
POSTHOOK: query: explain select count(ts) from stats_tbl_part
@@ -1023,11 +927,11 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: stats_tbl_part
- Statistics: Num rows: 24705 Data size: 988274 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ts (type: timestamp)
outputColumnNames: ts
- Statistics: Num rows: 24705 Data size: 988274 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(ts)
mode: hash
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/ptf.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/ptf.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/ptf.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/ptf.q.out Wed Feb 12 05:20:31 2014
@@ -766,6 +766,7 @@ sum(p_retailprice) as s
from part
group by p_mfgr, p_brand
PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@part
POSTHOOK: query: -- 16. testViewAsTableInputToPTF
create view IF NOT EXISTS mfgr_price_view as
select p_mfgr, p_brand,
@@ -773,6 +774,7 @@ sum(p_retailprice) as s
from part
group by p_mfgr, p_brand
POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@part
POSTHOOK: Output: default@mfgr_price_view
PREHOOK: query: select p_mfgr, p_brand, s,
sum(s) over w1 as s1