You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/02/12 06:20:32 UTC

svn commit: r1567528 [1/3] - in /hive/trunk: itests/qtest/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/ ql/src/java/org/apache/hadoop/hive/ql/optimize...

Author: gunther
Date: Wed Feb 12 05:20:31 2014
New Revision: 1567528

URL: http://svn.apache.org/r1567528
Log:
HIVE-6218: Stats for row-count not getting updated with Tez insert + dbclass=counter (Patch by Gunther Hagleitner, reviewed by Vikram Dixit K)

Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java
    hive/trunk/ql/src/test/results/clientpositive/tez/stats_counter.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/stats_counter_partitioned.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/stats_noscan_1.q.out
Modified:
    hive/trunk/itests/qtest/pom.xml
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregatorTez.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java
    hive/trunk/ql/src/test/results/clientpositive/tez/bucket2.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/bucket3.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/bucket4.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/ctas.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/merge1.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/merge2.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/ptf.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/subquery_exists.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/tez_dml.q.out

Modified: hive/trunk/itests/qtest/pom.xml
URL: http://svn.apache.org/viewvc/hive/trunk/itests/qtest/pom.xml?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/itests/qtest/pom.xml (original)
+++ hive/trunk/itests/qtest/pom.xml Wed Feb 12 05:20:31 2014
@@ -39,7 +39,7 @@
     <minimr.query.files>stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q</minimr.query.files>
     <minimr.query.negative.files>cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q</minimr.query.negative.files>
     <minitez.query.files>tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q</minitez.query.files>
-    <minitez.query.files.shared>join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q</minitez.query.files.shared>
+    <minitez.query.files.shared>join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q,stats_counter.q,stats_noscan_1.q,stats_counter_partitioned.q</minitez.query.files.shared>
     <beeline.positive.exclude>add_part_exist.q,alter1.q,alter2.q,alter4.q,alter5.q,alter_rename_partition.q,alter_rename_partition_authorization.q,archive.q,archive_corrupt.q,archive_multi.q,archive_mr_1806.q,archive_multi_mr_1806.q,authorization_1.q,authorization_2.q,authorization_4.q,authorization_5.q,authorization_6.q,authorization_7.q,ba_table1.q,ba_table2.q,ba_table3.q,ba_table_udfs.q,binary_table_bincolserde.q,binary_table_colserde.q,cluster.q,columnarserde_create_shortcut.q,combine2.q,constant_prop.q,create_nested_type.q,create_or_replace_view.q,create_struct_table.q,create_union_table.q,database.q,database_location.q,database_properties.q,ddltime.q,describe_database_json.q,drop_database_removes_partition_dirs.q,escape1.q,escape2.q,exim_00_nonpart_empty.q,exim_01_nonpart.q,exim_02_00_part_empty.q,exim_02_part.q,exim_03_nonpart_over_compat.q,exim_04_all_part.q,exim_04_evolved_parts.q,exim_05_some_part.q,exim_06_one_part.q,exim_07_all_part_over_nonoverlap.q,exim_08_nonpart_rena
 me.q,exim_09_part_spec_nonoverlap.q,exim_10_external_managed.q,exim_11_managed_external.q,exim_12_external_location.q,exim_13_managed_location.q,exim_14_managed_location_over_existing.q,exim_15_external_part.q,exim_16_part_external.q,exim_17_part_managed.q,exim_18_part_external.q,exim_19_00_part_external_location.q,exim_19_part_external_location.q,exim_20_part_managed_location.q,exim_21_export_authsuccess.q,exim_22_import_exist_authsuccess.q,exim_23_import_part_authsuccess.q,exim_24_import_nonexist_authsuccess.q,global_limit.q,groupby_complex_types.q,groupby_complex_types_multi_single_reducer.q,index_auth.q,index_auto.q,index_auto_empty.q,index_bitmap.q,index_bitmap1.q,index_bitmap2.q,index_bitmap3.q,index_bitmap_auto.q,index_bitmap_rc.q,index_compact.q,index_compact_1.q,index_compact_2.q,index_compact_3.q,index_stale_partitioned.q,init_file.q,input16.q,input16_cc.q,input46.q,input_columnarserde.q,input_dynamicserde.q,input_lazyserde.q,input_testxpath3.q,input_testxpath4.q,insert2_o
 verwrite_partitions.q,insertexternal1.q,join_thrift.q,lateral_view.q,load_binary_data.q,load_exist_part_authsuccess.q,load_nonpart_authsuccess.q,load_part_authsuccess.q,loadpart_err.q,lock1.q,lock2.q,lock3.q,lock4.q,merge_dynamic_partition.q,multi_insert.q,multi_insert_move_tasks_share_dependencies.q,null_column.q,ppd_clusterby.q,query_with_semi.q,rename_column.q,sample6.q,sample_islocalmode_hook.q,set_processor_namespaces.q,show_tables.q,source.q,split_sample.q,str_to_map.q,transform1.q,udaf_collect_set.q,udaf_context_ngrams.q,udaf_histogram_numeric.q,udaf_ngrams.q,udaf_percentile_approx.q,udf_array.q,udf_bitmap_and.q,udf_bitmap_or.q,udf_explode.q,udf_format_number.q,udf_map.q,udf_map_keys.q,udf_map_values.q,udf_max.q,udf_min.q,udf_named_struct.q,udf_percentile.q,udf_printf.q,udf_sentences.q,udf_sort_array.q,udf_split.q,udf_struct.q,udf_substr.q,udf_translate.q,udf_union.q,udf_xpath.q,udtf_stack.q,view.q,virtual_column.q</beeline.positive.exclude>
   </properties>
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java Wed Feb 12 05:20:31 2014
@@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.plan.Lo
 import org.apache.hadoop.hive.ql.plan.StatsWork;
 import org.apache.hadoop.hive.ql.plan.api.StageType;
 import org.apache.hadoop.hive.ql.stats.CounterStatsAggregator;
+import org.apache.hadoop.hive.ql.stats.CounterStatsAggregatorTez;
 import org.apache.hadoop.hive.ql.stats.StatsAggregator;
 import org.apache.hadoop.hive.ql.stats.StatsFactory;
 import org.apache.hadoop.hive.ql.stats.StatsPublisher;
@@ -154,7 +155,8 @@ public class StatsTask extends Task<Stat
       int maxPrefixLength = StatsFactory.getMaxPrefixLength(conf);
 
       // "counter" type does not need to collect stats per task
-      boolean counterStat = statsAggregator instanceof CounterStatsAggregator;
+      boolean counterStat = statsAggregator instanceof CounterStatsAggregator 
+        || statsAggregator instanceof CounterStatsAggregatorTez;
       if (partitions == null) {
         org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
         Map<String, String> parameters = tTable.getParameters();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java Wed Feb 12 05:20:31 2014
@@ -49,6 +49,8 @@ import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.tez.client.TezSession;
+import org.apache.tez.common.counters.CounterGroup;
+import org.apache.tez.common.counters.TezCounter;
 import org.apache.tez.common.counters.TezCounters;
 import org.apache.tez.dag.api.DAG;
 import org.apache.tez.dag.api.Edge;
@@ -154,6 +156,14 @@ public class TezTask extends Task<TezWor
       Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
       counters = client.getDAGStatus(statusGetOpts).getDAGCounters();
 
+      if (LOG.isInfoEnabled()) {
+        for (CounterGroup group: counters) {
+          LOG.info(group.getDisplayName() +":");
+          for (TezCounter counter: group) {
+            LOG.info("   "+counter.getDisplayName()+": "+counter.getValue());
+          }
+        }
+      }
     } catch (Exception e) {
       LOG.error("Failed to execute tez graph.", e);
       // rc will be 1 at this point indicating failure.

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/stats/PartialScanMapper.java Wed Feb 12 05:20:31 2014
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.common.Sta
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.MapredContext;
 import org.apache.hadoop.hive.ql.io.RCFile.KeyBuffer;
 import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileKeyBufferWrapper;
 import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileValueBufferWrapper;
@@ -59,6 +60,7 @@ public class PartialScanMapper extends M
   private long uncompressedFileSize = 0;
   private long rowNo = 0;
   private boolean exception = false;
+  private Reporter rp = null;
 
   public final static Log LOG = LogFactory.getLog("PartialScanMapper");
 
@@ -68,6 +70,7 @@ public class PartialScanMapper extends M
   @Override
   public void configure(JobConf job) {
     jc = job;
+    MapredContext.init(true, new JobConf(jc));
     statsAggKeyPrefix = HiveConf.getVar(job,
         HiveConf.ConfVars.HIVE_STATS_KEY_PREFIX);
   }
@@ -77,6 +80,12 @@ public class PartialScanMapper extends M
   public void map(Object k, RCFileValueBufferWrapper value,
       OutputCollector<Object, Object> output, Reporter reporter)
       throws IOException {
+
+    if (rp == null) {
+      this.rp = reporter;
+      MapredContext.get().setReporter(reporter);
+    }
+
     try {
       //CombineHiveInputFormat is set in PartialScanTask.
       RCFileKeyBufferWrapper key = (RCFileKeyBufferWrapper) ((CombineHiveKey) k).getKey();
@@ -114,6 +123,8 @@ public class PartialScanMapper extends M
     } catch (HiveException e) {
       this.exception = true;
       throw new RuntimeException(e);
+    } finally {
+      MapredContext.close();
     }
   }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java Wed Feb 12 05:20:31 2014
@@ -24,6 +24,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.lang.StringBuffer;
 import java.util.Stack;
 
 import org.apache.hadoop.fs.Path;
@@ -124,23 +125,10 @@ public class GenMRTableScan1 implements 
           if (currWork.getReduceWork() != null) {
             currWork.getReduceWork().setGatheringStats(true);
           }
+
           // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list,
           // and pass it to setTaskPlan as the last parameter
-          Set<Partition> confirmedPartns = new HashSet<Partition>();
-          tableSpec tblSpec = parseInfo.getTableSpec();
-          if (tblSpec.specType == tableSpec.SpecType.STATIC_PARTITION) {
-            // static partition
-            if (tblSpec.partHandle != null) {
-              confirmedPartns.add(tblSpec.partHandle);
-            } else {
-              // partial partition spec has null partHandle
-              assert parseInfo.isNoScanAnalyzeCommand();
-              confirmedPartns.addAll(tblSpec.partitions);
-            }
-          } else if (tblSpec.specType == tableSpec.SpecType.DYNAMIC_PARTITION) {
-            // dynamic partition
-            confirmedPartns.addAll(tblSpec.partitions);
-          }
+          Set<Partition> confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(parseInfo);
           if (confirmedPartns.size() > 0) {
             Table source = parseCtx.getQB().getMetaData().getTableForAlias(alias);
             PrunedPartitionList partList = new PrunedPartitionList(source, confirmedPartns, false);
@@ -174,24 +162,9 @@ public class GenMRTableScan1 implements 
       Task<? extends Serializable> currTask, QBParseInfo parseInfo, StatsWork statsWork,
       Task<StatsWork> statsTask) throws SemanticException {
     String aggregationKey = op.getConf().getStatsAggPrefix();
-    List<Path> inputPaths = new ArrayList<Path>();
-    switch (parseInfo.getTableSpec().specType) {
-    case TABLE_ONLY:
-      inputPaths.add(parseInfo.getTableSpec().tableHandle.getPath());
-      break;
-    case STATIC_PARTITION:
-      Partition part = parseInfo.getTableSpec().partHandle;
-      try {
-        aggregationKey += Warehouse.makePartPath(part.getSpec());
-      } catch (MetaException e) {
-        throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_AGGKEY.getMsg(
-            part.getDataLocation().toString() + e.getMessage()));
-      }
-      inputPaths.add(part.getDataLocation());
-      break;
-    default:
-      assert false;
-    }
+    StringBuffer aggregationKeyBuffer = new StringBuffer(aggregationKey);
+    List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(parseInfo, aggregationKeyBuffer);
+    aggregationKey = aggregationKeyBuffer.toString();
 
     // scan work
     PartialScanWork scanWork = new PartialScanWork(inputPaths);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Wed Feb 12 05:20:31 2014
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.optimi
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
@@ -28,12 +29,14 @@ import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Properties;
 import java.util.Set;
+import java.lang.StringBuffer;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.ConditionalTask;
@@ -60,14 +63,18 @@ import org.apache.hadoop.hive.ql.io.RCFi
 import org.apache.hadoop.hive.ql.io.rcfile.merge.MergeWork;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx;
 import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx;
 import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPruner;
 import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec;
 import org.apache.hadoop.hive.ql.parse.OpParseContext;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
 import org.apache.hadoop.hive.ql.parse.QBJoinTree;
+import org.apache.hadoop.hive.ql.parse.QBParseInfo;
 import org.apache.hadoop.hive.ql.parse.RowResolver;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
@@ -1704,6 +1711,48 @@ public final class GenMapRedUtils {
     return dest;
   }
 
+  public static Set<Partition> getConfirmedPartitionsForScan(QBParseInfo parseInfo) {
+    Set<Partition> confirmedPartns = new HashSet<Partition>();
+    tableSpec tblSpec = parseInfo.getTableSpec();
+    if (tblSpec.specType == tableSpec.SpecType.STATIC_PARTITION) {
+      // static partition
+      if (tblSpec.partHandle != null) {
+        confirmedPartns.add(tblSpec.partHandle);
+      } else {
+        // partial partition spec has null partHandle
+        assert parseInfo.isNoScanAnalyzeCommand();
+        confirmedPartns.addAll(tblSpec.partitions);
+      }
+    } else if (tblSpec.specType == tableSpec.SpecType.DYNAMIC_PARTITION) {
+      // dynamic partition
+      confirmedPartns.addAll(tblSpec.partitions);
+    }
+    return confirmedPartns;
+  }
+
+  public static List<Path> getInputPathsForPartialScan(QBParseInfo parseInfo, StringBuffer aggregationKey) 
+    throws SemanticException {
+    List<Path> inputPaths = new ArrayList<Path>();
+    switch (parseInfo.getTableSpec().specType) {
+    case TABLE_ONLY:
+      inputPaths.add(parseInfo.getTableSpec().tableHandle.getPath());
+      break;
+    case STATIC_PARTITION:
+      Partition part = parseInfo.getTableSpec().partHandle;
+      try {
+        aggregationKey.append(Warehouse.makePartPath(part.getSpec()));
+      } catch (MetaException e) {
+        throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_AGGKEY.getMsg(
+            part.getDataLocation().toString() + e.getMessage()));
+      }
+      inputPaths.add(part.getDataLocation());
+      break;
+    default:
+      assert false;
+    }
+    return inputPaths;
+  }
+
   private GenMapRedUtils() {
     // prevent instantiation
   }

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java?rev=1567528&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java Wed Feb 12 05:20:31 2014
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.parse;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils;
+import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceWork;
+import org.apache.hadoop.hive.ql.plan.TezWork;
+import org.apache.hadoop.hive.ql.plan.TezWork.EdgeType;
+
+/**
+ * GenTezUtils is a collection of shared helper methods to produce
+ * TezWork
+ */
+public class GenTezUtils {
+
+  static final private Log LOG = LogFactory.getLog(GenTezUtils.class.getName());
+
+  // sequence number is used to name vertices (e.g.: Map 1, Reduce 14, ...)
+  private int sequenceNumber = 0;
+
+  // singleton
+  private static GenTezUtils utils;
+
+  public static GenTezUtils getUtils() {
+    if (utils == null) {
+      utils = new GenTezUtils();
+    }
+    return utils;
+  }
+
+  protected GenTezUtils() {
+  }
+
+  public void resetSequenceNumber() {
+    sequenceNumber = 0;
+  }
+
+  public ReduceWork createReduceWork(GenTezProcContext context, Operator<?> root, TezWork tezWork) {
+    assert !root.getParentOperators().isEmpty();
+    ReduceWork reduceWork = new ReduceWork("Reducer "+ (++sequenceNumber));
+    LOG.debug("Adding reduce work (" + reduceWork.getName() + ") for " + root);
+    reduceWork.setReducer(root);
+    reduceWork.setNeedsTagging(GenMapRedUtils.needsTagging(reduceWork));
+
+    // All parents should be reduce sinks. We pick the one we just walked
+    // to choose the number of reducers. In the join/union case they will
+    // all be -1. In sort/order case where it matters there will be only
+    // one parent.
+    assert context.parentOfRoot instanceof ReduceSinkOperator;
+    ReduceSinkOperator reduceSink = (ReduceSinkOperator) context.parentOfRoot;
+
+    reduceWork.setNumReduceTasks(reduceSink.getConf().getNumReducers());
+
+    setupReduceSink(context, reduceWork, reduceSink);
+
+    tezWork.add(reduceWork);
+    tezWork.connect(
+        context.preceedingWork,
+        reduceWork, EdgeType.SIMPLE_EDGE);
+
+    return reduceWork;
+  }
+
+  protected void setupReduceSink(GenTezProcContext context, ReduceWork reduceWork,
+      ReduceSinkOperator reduceSink) {
+
+    LOG.debug("Setting up reduce sink: " + reduceSink
+        + " with following reduce work: " + reduceWork.getName());
+
+    // need to fill in information about the key and value in the reducer
+    GenMapRedUtils.setKeyAndValueDesc(reduceWork, reduceSink);
+
+    // remember which parent belongs to which tag
+    reduceWork.getTagToInput().put(reduceSink.getConf().getTag(),
+         context.preceedingWork.getName());
+
+    // remember the output name of the reduce sink
+    reduceSink.getConf().setOutputName(reduceWork.getName());
+  }
+
+  public MapWork createMapWork(GenTezProcContext context, Operator<?> root,
+      TezWork tezWork, PrunedPartitionList partitions) throws SemanticException {
+    assert root.getParentOperators().isEmpty();
+    MapWork mapWork = new MapWork("Map "+ (++sequenceNumber));
+    LOG.debug("Adding map work (" + mapWork.getName() + ") for " + root);
+
+    // map work starts with table scan operators
+    assert root instanceof TableScanOperator;
+    String alias = ((TableScanOperator)root).getConf().getAlias();
+
+    setupMapWork(mapWork, context, partitions, root, alias);
+
+    // add new item to the tez work
+    tezWork.add(mapWork);
+
+    return mapWork;
+  }
+
+  // this method's main use is to help unit testing this class
+  protected void setupMapWork(MapWork mapWork, GenTezProcContext context, 
+      PrunedPartitionList partitions, Operator<? extends OperatorDesc> root, 
+      String alias) throws SemanticException {
+    // All the setup is done in GenMapRedUtils
+    GenMapRedUtils.setMapWork(mapWork, context.parseContext,
+        context.inputs, partitions, root, alias, context.conf, false);
+  }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java Wed Feb 12 05:20:31 2014
@@ -49,8 +49,15 @@ public class GenTezWork implements NodeP
 
   static final private Log LOG = LogFactory.getLog(GenTezWork.class.getName());
 
-  // sequence number is used to name vertices (e.g.: Map 1, Reduce 14, ...)
-  private int sequenceNumber = 0;
+  // instance of shared utils
+  private GenTezUtils utils = null;
+
+  /**
+   * Constructor takes utils as parameter to facilitate testing
+   */
+  public GenTezWork(GenTezUtils utils) {
+    this.utils = utils;
+  }
 
   @Override
   public Object process(Node nd, Stack<Node> stack,
@@ -92,9 +99,9 @@ public class GenTezWork implements NodeP
     } else {
       // create a new vertex
       if (context.preceedingWork == null) {
-        work = createMapWork(context, root, tezWork);
+        work = utils.createMapWork(context, root, tezWork, null);
       } else {
-        work = createReduceWork(context, root, tezWork);
+        work = utils.createReduceWork(context, root, tezWork);
       }
       context.rootToWorkMap.put(root, work);
     }
@@ -186,74 +193,4 @@ public class GenTezWork implements NodeP
 
     return null;
   }
-
-  protected ReduceWork createReduceWork(GenTezProcContext context, Operator<?> root,
-      TezWork tezWork) {
-    assert !root.getParentOperators().isEmpty();
-    ReduceWork reduceWork = new ReduceWork("Reducer "+ (++sequenceNumber));
-    LOG.debug("Adding reduce work (" + reduceWork.getName() + ") for " + root);
-    reduceWork.setReducer(root);
-    reduceWork.setNeedsTagging(GenMapRedUtils.needsTagging(reduceWork));
-
-    // All parents should be reduce sinks. We pick the one we just walked
-    // to choose the number of reducers. In the join/union case they will
-    // all be -1. In sort/order case where it matters there will be only
-    // one parent.
-    assert context.parentOfRoot instanceof ReduceSinkOperator;
-    ReduceSinkOperator reduceSink = (ReduceSinkOperator) context.parentOfRoot;
-
-    reduceWork.setNumReduceTasks(reduceSink.getConf().getNumReducers());
-
-    setupReduceSink(context, reduceWork, reduceSink);
-
-    tezWork.add(reduceWork);
-    tezWork.connect(
-        context.preceedingWork,
-        reduceWork, EdgeType.SIMPLE_EDGE);
-
-    return reduceWork;
-  }
-
-  protected void setupReduceSink(GenTezProcContext context, ReduceWork reduceWork,
-      ReduceSinkOperator reduceSink) {
-
-    LOG.debug("Setting up reduce sink: " + reduceSink
-        + " with following reduce work: " + reduceWork.getName());
-
-    // need to fill in information about the key and value in the reducer
-    GenMapRedUtils.setKeyAndValueDesc(reduceWork, reduceSink);
-
-    // remember which parent belongs to which tag
-    reduceWork.getTagToInput().put(reduceSink.getConf().getTag(),
-         context.preceedingWork.getName());
-
-    // remember the output name of the reduce sink
-    reduceSink.getConf().setOutputName(reduceWork.getName());
-  }
-
-  protected MapWork createMapWork(GenTezProcContext context, Operator<?> root,
-      TezWork tezWork) throws SemanticException {
-    assert root.getParentOperators().isEmpty();
-    MapWork mapWork = new MapWork("Map "+ (++sequenceNumber));
-    LOG.debug("Adding map work (" + mapWork.getName() + ") for " + root);
-
-    // map work starts with table scan operators
-    assert root instanceof TableScanOperator;
-    String alias = ((TableScanOperator)root).getConf().getAlias();
-
-    setupMapWork(mapWork, context, root, alias);
-
-    // add new item to the tez work
-    tezWork.add(mapWork);
-
-    return mapWork;
-  }
-
-  // this method's main use is to help unit testing this class
-  protected void setupMapWork(MapWork mapWork, GenTezProcContext context,
-      Operator<? extends OperatorDesc> root, String alias) throws SemanticException {
-    // All the setup is done in GenMapRedUtils
-    GenMapRedUtils.setMapWork(mapWork, context.parseContext,
-        context.inputs, null, root, alias, context.conf, false);
-  }
 }

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java?rev=1567528&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java Wed Feb 12 05:20:31 2014
@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.parse;
+
+import java.lang.StringBuffer;
+import java.util.List;
+import java.util.Set;
+import java.util.Stack;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.DriverContext;
+import org.apache.hadoop.hive.ql.ErrorMsg;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec;
+import org.apache.hadoop.hive.ql.parse.GenTezWork;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
+import org.apache.hadoop.hive.ql.parse.QBParseInfo;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.plan.TezWork;
+import org.apache.hadoop.hive.ql.plan.StatsWork;
+
+/**
+ * ProcessAnalyzeTable sets up work for the several variants of analyze table
+ * (normal, no scan, partial scan.) The plan at this point will be a single
+ * table scan operator.
+ */
+public class ProcessAnalyzeTable implements NodeProcessor {
+
+  static final private Log LOG = LogFactory.getLog(ProcessAnalyzeTable.class.getName());
+
+  // shared plan utils for tez
+  private GenTezUtils utils = null;
+
+  /**
+   * Injecting the utils in the constructor facilitates testing
+   */
+  public ProcessAnalyzeTable(GenTezUtils utils) {
+    this.utils = utils;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public Object process(Node nd, Stack<Node> stack,
+      NodeProcessorCtx procContext, Object... nodeOutputs)
+      throws SemanticException {
+
+    GenTezProcContext context = (GenTezProcContext) procContext;
+    
+    TableScanOperator tableScan = (TableScanOperator) nd;
+
+    ParseContext parseContext = context.parseContext;
+    QB queryBlock = parseContext.getQB();
+    QBParseInfo parseInfo = parseContext.getQB().getParseInfo();
+    
+    if (parseInfo.isAnalyzeCommand()) {
+
+      assert tableScan.getChildOperators() == null
+        || tableScan.getChildOperators().size() == 0;
+
+      String alias = null;
+      for (String a: parseContext.getTopOps().keySet()) {
+        if (tableScan == parseContext.getTopOps().get(a)) {
+          alias = a;
+        }
+      }
+
+      assert alias != null;
+
+      TezWork tezWork = context.currentTask.getWork();
+
+      // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
+      // The plan consists of a simple TezTask followed by a StatsTask.
+      // The Tez task is just a simple TableScanOperator
+      
+      StatsWork statsWork = new StatsWork(parseInfo.getTableSpec());
+      statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix());
+      statsWork.setSourceTask(context.currentTask);
+      statsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
+      Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseContext.getConf());
+      context.currentTask.addDependentTask(statsTask);
+
+      // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
+      // The plan consists of a StatsTask only.
+      if (parseInfo.isNoScanAnalyzeCommand()) {
+        statsTask.setParentTasks(null);
+        statsWork.setNoScanAnalyzeCommand(true);
+        context.rootTasks.remove(context.currentTask);
+        context.rootTasks.add(statsTask);
+      }
+
+      // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
+      if (parseInfo.isPartialScanAnalyzeCommand()) {
+        handlePartialScanCommand(tableScan, parseContext, parseInfo, statsWork, context, statsTask);
+      }
+
+      // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list,
+      // and pass it to setTaskPlan as the last parameter
+      Set<Partition> confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(parseInfo);
+      PrunedPartitionList partitions = null;
+      if (confirmedPartns.size() > 0) {
+        Table source = queryBlock.getMetaData().getTableForAlias(alias);
+        partitions = new PrunedPartitionList(source, confirmedPartns, false);
+      }
+
+      MapWork w = utils.createMapWork(context, tableScan, tezWork, partitions);
+      w.setGatheringStats(true);
+
+      return true;
+    }
+
+    return null;
+  }
+
+  /**
+   * handle partial scan command.
+   *
+   * It is composed of PartialScanTask followed by StatsTask.
+   */
+  private void handlePartialScanCommand(TableScanOperator tableScan, ParseContext parseContext, 
+      QBParseInfo parseInfo, StatsWork statsWork, GenTezProcContext context,
+      Task<StatsWork> statsTask) throws SemanticException {
+
+    String aggregationKey = tableScan.getConf().getStatsAggPrefix();
+    StringBuffer aggregationKeyBuffer = new StringBuffer(aggregationKey);
+    List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(parseInfo, aggregationKeyBuffer);
+    aggregationKey = aggregationKeyBuffer.toString();
+    
+    // scan work
+    PartialScanWork scanWork = new PartialScanWork(inputPaths);
+    scanWork.setMapperCannotSpanPartns(true);
+    scanWork.setAggKey(aggregationKey);
+
+    // stats work
+    statsWork.setPartialScanAnalyzeCommand(true);
+
+    // partial scan task
+    DriverContext driverCxt = new DriverContext();
+    Task<PartialScanWork> partialScanTask = TaskFactory.get(scanWork, parseContext.getConf());
+    partialScanTask.initialize(parseContext.getConf(), null, driverCxt);
+    partialScanTask.setWork(scanWork);
+    statsWork.setSourceTask(partialScanTask);
+
+    // task dependency
+    context.rootTasks.remove(context.currentTask);
+    context.rootTasks.add(partialScanTask);
+    partialScanTask.addDependentTask(statsTask);
+  }
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java Wed Feb 12 05:20:31 2014
@@ -35,8 +35,7 @@ public class TaskCompilerFactory {
    * into executable units.
    */
   public static TaskCompiler getCompiler(HiveConf conf, ParseContext parseContext) {
-    if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")
-        && !parseContext.getQB().getParseInfo().isAnalyzeCommand()) {
+    if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
       return new TezCompiler();
     } else {
       return new MapReduceCompiler();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java Wed Feb 12 05:20:31 2014
@@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.exec.Jo
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.exec.UnionOperator;
 import org.apache.hadoop.hive.ql.exec.tez.TezTask;
@@ -109,8 +110,10 @@ public class TezCompiler extends TaskCom
       List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs)
       throws SemanticException {
 
+    GenTezUtils.getUtils().resetSequenceNumber();
+
     ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
-    GenTezWork genTezWork = new GenTezWork();
+    GenTezWork genTezWork = new GenTezWork(GenTezUtils.getUtils());
 
     GenTezProcContext procCtx = new GenTezProcContext(
         conf, tempParseContext, mvTask, rootTasks, inputs, outputs);
@@ -131,6 +134,10 @@ public class TezCompiler extends TaskCom
         FileSinkOperator.getOperatorName() + "%"),
         new CompositeProcessor(new FileSinkProcessor(), genTezWork));
 
+    opRules.put(new RuleRegExp("Handle Potential Analyze Command",
+        TableScanOperator.getOperatorName() + "%"),
+        new ProcessAnalyzeTable(GenTezUtils.getUtils()));
+
     opRules.put(new RuleRegExp("Bail on Union",
         UnionOperator.getOperatorName() + "%"), new NodeProcessor()
     {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregatorTez.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregatorTez.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregatorTez.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregatorTez.java Wed Feb 12 05:20:31 2014
@@ -57,17 +57,21 @@ public class CounterStatsAggregatorTez i
 
   @Override
   public String aggregateStats(String keyPrefix, String statType) {
-    if (delegate) {
-      return mrAggregator.aggregateStats(keyPrefix, statType);
-    }
+    String result;
 
-    long value = 0;
-    for (String groupName : counters.getGroupNames()) {
-      if (groupName.startsWith(keyPrefix)) {
-        value += counters.getGroup(groupName).findCounter(statType).getValue();
+    if (delegate) {
+      result = mrAggregator.aggregateStats(keyPrefix, statType);
+    } else {
+      long value = 0;
+      for (String groupName : counters.getGroupNames()) {
+        if (groupName.startsWith(keyPrefix)) {
+          value += counters.getGroup(groupName).findCounter(statType).getValue();
+        }
       }
+      result = String.valueOf(value);
     }
-    return String.valueOf(value);
+    LOG.info("Counter based stats for ("+keyPrefix+") are: "+result);
+    return result;
   }
 
   @Override

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java Wed Feb 12 05:20:31 2014
@@ -76,17 +76,19 @@ public class TestGenTezWork {
         (Set<ReadEntity>)Collections.EMPTY_SET,
         (Set<WriteEntity>)Collections.EMPTY_SET);
 
-    proc = new GenTezWork() {
+    proc = new GenTezWork(new GenTezUtils() {
       @Override
-      protected void setupMapWork(MapWork mapWork, GenTezProcContext context,
-          Operator<? extends OperatorDesc> root, String alias) throws SemanticException {
+        protected void setupMapWork(MapWork mapWork, GenTezProcContext context, 
+          PrunedPartitionList partitions, Operator<? extends OperatorDesc> root, String alias) 
+        throws SemanticException {
+        
         LinkedHashMap<String, Operator<? extends OperatorDesc>> map
           = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
         map.put("foo", root);
         mapWork.setAliasToWork(map);
         return;
       }
-    };
+    });
 
     fs = new FileSinkOperator();
     fs.setConf(new FileSinkDesc());

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/bucket2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/bucket2.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/bucket2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/bucket2.q.out Wed Feb 12 05:20:31 2014
@@ -201,26 +201,26 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: s
-                  Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean)
-                    Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
-                        Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col0 (type: int), _col1 (type: string)
         Reducer 2 
             Reduce Operator Tree:
               Extract
-                Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/bucket3.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/bucket3.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/bucket3.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/bucket3.q.out Wed Feb 12 05:20:31 2014
@@ -226,26 +226,26 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: s
-                  Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean)
-                    Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: int), value (type: string), ds (type: string)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
-                        Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
         Reducer 2 
             Reduce Operator Tree:
               Extract
-                Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/bucket4.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/bucket4.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/bucket4.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/bucket4.q.out Wed Feb 12 05:20:31 2014
@@ -202,17 +202,17 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: s
-                  Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean)
-                    Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.TextInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/ctas.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/ctas.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/ctas.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/ctas.q.out Wed Feb 12 05:20:31 2014
@@ -374,8 +374,8 @@ Table Type:         	MANAGED_TABLE      
 Table Parameters:	 	 
 	COLUMN_STATS_ACCURATE	true                
 	numFiles            	1                   
-	numRows             	0                   
-	rawDataSize         	0                   
+	numRows             	10                  
+	rawDataSize         	96                  
 	totalSize           	106                 
 #### A masked pattern was here ####
 	 	 
@@ -540,8 +540,8 @@ Table Type:         	MANAGED_TABLE      
 Table Parameters:	 	 
 	COLUMN_STATS_ACCURATE	true                
 	numFiles            	1                   
-	numRows             	0                   
-	rawDataSize         	0                   
+	numRows             	10                  
+	rawDataSize         	120                 
 	totalSize           	199                 
 #### A masked pattern was here ####
 	 	 
@@ -603,8 +603,8 @@ Table Type:         	MANAGED_TABLE      
 Table Parameters:	 	 
 	COLUMN_STATS_ACCURATE	true                
 	numFiles            	1                   
-	numRows             	0                   
-	rawDataSize         	0                   
+	numRows             	10                  
+	rawDataSize         	120                 
 	totalSize           	199                 
 #### A masked pattern was here ####
 	 	 
@@ -791,8 +791,8 @@ Table Type:         	MANAGED_TABLE      
 Table Parameters:	 	 
 	COLUMN_STATS_ACCURATE	true                
 	numFiles            	1                   
-	numRows             	0                   
-	rawDataSize         	0                   
+	numRows             	10                  
+	rawDataSize         	96                  
 	totalSize           	106                 
 #### A masked pattern was here ####
 	 	 

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out Wed Feb 12 05:20:31 2014
@@ -201,26 +201,26 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: s
-                  Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean)
-                    Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
-                        Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col0 (type: int), _col1 (type: string)
         Reducer 2 
             Reduce Operator Tree:
               Extract
-                Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 27 Data size: 2853 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/filter_join_breaktask.q.out Wed Feb 12 05:20:31 2014
@@ -144,17 +144,17 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: f
-                  Statistics: Num rows: 59 Data size: 236 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 30 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 13 Data size: 109 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: key (type: int)
                       sort order: +
                       Map-reduce partition columns: key (type: int)
-                      Statistics: Num rows: 30 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 13 Data size: 109 Basic stats: COMPLETE Column stats: NONE
                       tag: 0
                       value expressions: key (type: int)
             Path -> Alias:
@@ -174,9 +174,9 @@ STAGE PLANS:
 #### A masked pattern was here ####
                     name default.filter_join_breaktask
                     numFiles 1
-                    numRows 0
+                    numRows 25
                     partition_columns ds
-                    rawDataSize 0
+                    rawDataSize 211
                     serialization.ddl struct filter_join_breaktask { i32 key, string value}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -206,17 +206,17 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: g
-                  Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
                     predicate: (value <> '') (type: boolean)
-                    Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: value (type: string)
                       sort order: +
                       Map-reduce partition columns: value (type: string)
-                      Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE
                       tag: 1
                       value expressions: value (type: string)
             Path -> Alias:
@@ -236,9 +236,9 @@ STAGE PLANS:
 #### A masked pattern was here ####
                     name default.filter_join_breaktask
                     numFiles 1
-                    numRows 0
+                    numRows 25
                     partition_columns ds
-                    rawDataSize 0
+                    rawDataSize 211
                     serialization.ddl struct filter_join_breaktask { i32 key, string value}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -268,17 +268,17 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: m
-                  Statistics: Num rows: 2 Data size: 236 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
                     predicate: ((key is not null and value is not null) and (value <> '')) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 7 Data size: 59 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: key (type: int)
                       sort order: +
                       Map-reduce partition columns: key (type: int)
-                      Statistics: Num rows: 1 Data size: 118 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 7 Data size: 59 Basic stats: COMPLETE Column stats: NONE
                       tag: 1
                       value expressions: value (type: string)
             Path -> Alias:
@@ -298,9 +298,9 @@ STAGE PLANS:
 #### A masked pattern was here ####
                     name default.filter_join_breaktask
                     numFiles 1
-                    numRows 0
+                    numRows 25
                     partition_columns ds
-                    rawDataSize 0
+                    rawDataSize 211
                     serialization.ddl struct filter_join_breaktask { i32 key, string value}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
@@ -336,12 +336,12 @@ STAGE PLANS:
                   0 {VALUE._col0}
                   1 {VALUE._col1}
                 outputColumnNames: _col0, _col6
-                Statistics: Num rows: 33 Data size: 132 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 14 Data size: 119 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col6 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col6 (type: string)
-                  Statistics: Num rows: 33 Data size: 132 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 14 Data size: 119 Basic stats: COMPLETE Column stats: NONE
                   tag: 0
                   value expressions: _col0 (type: int)
         Reducer 3 
@@ -354,17 +354,17 @@ STAGE PLANS:
                   0 {VALUE._col0}
                   1 {VALUE._col1}
                 outputColumnNames: _col0, _col11
-                Statistics: Num rows: 36 Data size: 145 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: int), _col11 (type: string)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 36 Data size: 145 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
                     GlobalTableId: 0
 #### A masked pattern was here ####
                     NumFilesPerFileSink: 1
-                    Statistics: Num rows: 36 Data size: 145 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 27 Data size: 232 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
                     table:
                         input format: org.apache.hadoop.mapred.TextInputFormat

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/merge1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/merge1.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/merge1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/merge1.q.out Wed Feb 12 05:20:31 2014
@@ -541,14 +541,14 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: test_src
-                  Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: key (type: string)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
                       table:
                           input format: org.apache.hadoop.mapred.TextInputFormat
                           output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -665,14 +665,14 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: test_src
-                  Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: key (type: string)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
                       table:
                           input format: org.apache.hadoop.mapred.TextInputFormat
                           output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/merge2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/merge2.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/merge2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/merge2.q.out Wed Feb 12 05:20:31 2014
@@ -541,14 +541,14 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: test_src
-                  Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: key (type: string)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
                       table:
                           input format: org.apache.hadoop.mapred.TextInputFormat
                           output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -665,14 +665,14 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: test_src
-                  Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: key (type: string)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
                       table:
                           input format: org.apache.hadoop.mapred.TextInputFormat
                           output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/metadata_only_queries.q.out Wed Feb 12 05:20:31 2014
@@ -295,11 +295,11 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: stats_tbl
-                  Statistics: Num rows: 4731 Data size: 1040907 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
                     outputColumnNames: s, bo, bin, si, i, b
-                    Statistics: Num rows: 4731 Data size: 1040907 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count(), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b)
                       mode: hash
@@ -396,11 +396,11 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: stats_tbl_part
-                  Statistics: Num rows: 4491 Data size: 988274 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint)
                     outputColumnNames: s, bo, bin, si, i, b
-                    Statistics: Num rows: 4491 Data size: 988274 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count(), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b)
                       mode: hash
@@ -698,63 +698,18 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
   Stage-0 is a root stage
 
 STAGE PLANS:
-  Stage: Stage-1
-    Tez
-      Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: stats_tbl
-                  Statistics: Num rows: 4486 Data size: 1040907 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double)
-                    outputColumnNames: s, bo, bin, si, i, b, f, d
-                    Statistics: Num rows: 4486 Data size: 1040907 Basic stats: COMPLETE Column stats: NONE
-                    Group By Operator
-                      aggregations: count(), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d)
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                      Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint), _col10 (type: float), _col11 (type: double)
-        Reducer 2 
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), max(VALUE._col10), min(VALUE._col11)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint), _col10 (type: float), _col11 (type: double)
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                  Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
   Stage: Stage-0
     Fetch Operator
-      limit: -1
+      limit: 1
 
 PREHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
 PREHOOK: type: QUERY
-PREHOOK: Input: default@stats_tbl
 #### A masked pattern was here ####
 POSTHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl
 POSTHOOK: type: QUERY
-POSTHOOK: Input: default@stats_tbl
 #### A masked pattern was here ####
 POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl.bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ]
@@ -800,7 +755,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
-9999	9999	1999.8000000003176	9999	9999	9999	9999	9999	65791	4294967296	99.98	0.01
+9999	9999	1999.8	9999	9999	9999	9999	9999	65791	0	99.9800033569336	0.0
 PREHOOK: query: explain 
 select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
 PREHOOK: type: QUERY
@@ -852,69 +807,18 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
   Stage-0 is a root stage
 
 STAGE PLANS:
-  Stage: Stage-1
-    Tez
-      Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: stats_tbl_part
-                  Statistics: Num rows: 4258 Data size: 988274 Basic stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double)
-                    outputColumnNames: s, bo, bin, si, i, b, f, d
-                    Statistics: Num rows: 4258 Data size: 988274 Basic stats: COMPLETE Column stats: NONE
-                    Group By Operator
-                      aggregations: count(), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d)
-                      mode: hash
-                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                      Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        sort order: 
-                        Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint), _col10 (type: float), _col11 (type: double)
-        Reducer 2 
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), max(VALUE._col10), min(VALUE._col11)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint), _col10 (type: float), _col11 (type: double)
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                  Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
   Stage: Stage-0
     Fetch Operator
-      limit: -1
+      limit: 1
 
 PREHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
 PREHOOK: type: QUERY
-PREHOOK: Input: default@stats_tbl_part
-PREHOOK: Input: default@stats_tbl_part@dt=2010
-PREHOOK: Input: default@stats_tbl_part@dt=2011
-PREHOOK: Input: default@stats_tbl_part@dt=2012
 #### A masked pattern was here ####
 POSTHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part
 POSTHOOK: type: QUERY
-POSTHOOK: Input: default@stats_tbl_part
-POSTHOOK: Input: default@stats_tbl_part@dt=2010
-POSTHOOK: Input: default@stats_tbl_part@dt=2011
-POSTHOOK: Input: default@stats_tbl_part@dt=2012
 #### A masked pattern was here ####
 POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl.bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ]
@@ -960,7 +864,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTIT
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ]
 POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ]
-9489	9489	1897.8000000002944	9489	9489	9489	9489	9489	65791	4294967296	99.98	0.01
+9489	9489	1897.8	9489	9489	9489	9489	9489	65791	0	99.9800033569336	0.0
 PREHOOK: query: explain select count(ts) from stats_tbl_part
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select count(ts) from stats_tbl_part
@@ -1023,11 +927,11 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: stats_tbl_part
-                  Statistics: Num rows: 24705 Data size: 988274 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ts (type: timestamp)
                     outputColumnNames: ts
-                    Statistics: Num rows: 24705 Data size: 988274 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count(ts)
                       mode: hash

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/ptf.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/ptf.q.out?rev=1567528&r1=1567527&r2=1567528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/ptf.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/ptf.q.out Wed Feb 12 05:20:31 2014
@@ -766,6 +766,7 @@ sum(p_retailprice) as s 
 from part 
 group by p_mfgr, p_brand
 PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@part
 POSTHOOK: query: -- 16. testViewAsTableInputToPTF
 create view IF NOT EXISTS mfgr_price_view as 
 select p_mfgr, p_brand, 
@@ -773,6 +774,7 @@ sum(p_retailprice) as s 
 from part 
 group by p_mfgr, p_brand
 POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@part
 POSTHOOK: Output: default@mfgr_price_view
 PREHOOK: query: select p_mfgr, p_brand, s, 
 sum(s) over w1  as s1