You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2015/08/14 08:16:51 UTC
[7/7] hive git commit: backport HIVE-9560: When
hive.stats.collect.rawdatasize=true, 'rawDataSize' for an ORC table will
result in value '0' after running 'analyze table TABLE_NAME compute
statistics; ' (Prasanth Jayachandran reviewed by Ashutosh Chauhan)
backport HIVE-9560: When hive.stats.collect.rawdatasize=true, 'rawDataSize' for an ORC table will result in value '0' after running 'analyze table TABLE_NAME compute statistics;' (Prasanth Jayachandran reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9eb95813
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9eb95813
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9eb95813
Branch: refs/heads/branch-1.0
Commit: 9eb95813a0b58601642fe9293d7cea8cbb0a2215
Parents: 301de83
Author: Pengcheng Xiong <px...@apache.org>
Authored: Thu Aug 13 23:16:32 2015 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Thu Aug 13 23:16:32 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/exec/StatsNoJobTask.java | 8 +-
.../hive/ql/optimizer/GenMRTableScan1.java | 17 +-
.../hive/ql/parse/ProcessAnalyzeTable.java | 17 +-
.../hadoop/hive/ql/plan/StatsNoJobWork.java | 10 +
.../test/queries/clientpositive/orc_analyze.q | 28 +-
.../clientpositive/annotate_stats_part.q.out | 52 +-
.../clientpositive/annotate_stats_table.q.out | 20 +-
.../results/clientpositive/limit_pushdown.q.out | 48 +-
.../results/clientpositive/orc_analyze.q.out | 998 ++++++++++++++++---
.../clientpositive/tez/limit_pushdown.q.out | 48 +-
.../clientpositive/tez/orc_analyze.q.out | 998 ++++++++++++++++---
.../clientpositive/tez/vector_char_simple.q.out | 16 +-
.../tez/vector_left_outer_join.q.out | 16 +-
.../tez/vector_varchar_simple.q.out | 16 +-
.../clientpositive/tez/vectorization_0.q.out | 42 +-
.../clientpositive/tez/vectorization_13.q.out | 36 +-
.../clientpositive/tez/vectorization_14.q.out | 20 +-
.../clientpositive/tez/vectorization_15.q.out | 20 +-
.../clientpositive/tez/vectorization_16.q.out | 16 +-
.../clientpositive/tez/vectorization_7.q.out | 20 +-
.../clientpositive/tez/vectorization_8.q.out | 20 +-
.../clientpositive/tez/vectorization_9.q.out | 16 +-
.../clientpositive/tez/vectorization_div0.q.out | 28 +-
.../tez/vectorization_limit.q.out | 84 +-
.../tez/vectorization_pushdown.q.out | 6 +-
.../tez/vectorization_short_regress.q.out | 152 +--
.../tez/vectorized_distinct_gby.q.out | 8 +-
.../clientpositive/tez/vectorized_mapjoin.q.out | 14 +-
.../tez/vectorized_nested_mapjoin.q.out | 26 +-
.../tez/vectorized_shufflejoin.q.out | 16 +-
.../clientpositive/vector_char_simple.q.out | 16 +-
.../clientpositive/vector_coalesce.q.out | 40 +-
.../clientpositive/vector_decimal_cast.q.out | 10 +-
.../results/clientpositive/vector_elt.q.out | 12 +-
.../results/clientpositive/vector_if_expr.q.out | 12 +-
.../clientpositive/vector_left_outer_join.q.out | 12 +-
.../clientpositive/vector_varchar_simple.q.out | 16 +-
.../clientpositive/vectorization_0.q.out | 42 +-
.../clientpositive/vectorization_13.q.out | 36 +-
.../clientpositive/vectorization_14.q.out | 20 +-
.../clientpositive/vectorization_15.q.out | 20 +-
.../clientpositive/vectorization_16.q.out | 16 +-
.../clientpositive/vectorization_7.q.out | 20 +-
.../clientpositive/vectorization_8.q.out | 20 +-
.../clientpositive/vectorization_9.q.out | 16 +-
.../clientpositive/vectorization_div0.q.out | 36 +-
.../clientpositive/vectorization_limit.q.out | 94 +-
.../clientpositive/vectorization_pushdown.q.out | 6 +-
.../vectorization_short_regress.q.out | 152 +--
.../clientpositive/vectorized_case.q.out | 8 +-
.../clientpositive/vectorized_casts.q.out | 8 +-
.../vectorized_distinct_gby.q.out | 8 +-
.../clientpositive/vectorized_mapjoin.q.out | 12 +-
.../clientpositive/vectorized_math_funcs.q.out | 8 +-
.../vectorized_nested_mapjoin.q.out | 22 +-
.../clientpositive/vectorized_shufflejoin.q.out | 16 +-
.../vectorized_string_funcs.q.out | 8 +-
.../clientpositive/windowing_streaming.q.out | 14 +-
58 files changed, 2499 insertions(+), 1017 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java
index f089964..868cf04 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec;
import java.io.Serializable;
+import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentMap;
@@ -223,7 +224,12 @@ public class StatsNoJobTask extends Task<StatsNoJobWork> implements Serializable
int ret = 0;
try {
- List<Partition> partitions = getPartitionsList();
+ Collection<Partition> partitions = null;
+ if (work.getPrunedPartitionList() == null) {
+ partitions = getPartitionsList();
+ } else {
+ partitions = work.getPrunedPartitionList().getPartitions();
+ }
// non-partitioned table
if (partitions == null) {
http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java
index 7f574dc..d0f8b6e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java
@@ -90,16 +90,25 @@ public class GenMRTableScan1 implements NodeProcessor {
QBParseInfo parseInfo = parseCtx.getQB().getParseInfo();
if (parseInfo.isAnalyzeCommand()) {
- boolean partialScan = parseInfo.isPartialScanAnalyzeCommand();
- boolean noScan = parseInfo.isNoScanAnalyzeCommand();
- if (inputFormat.equals(OrcInputFormat.class) && (noScan || partialScan)) {
-
+ if (inputFormat.equals(OrcInputFormat.class)) {
+ // For ORC, all the following statements are the same
+ // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
+
// There will not be any MR or Tez job above this task
StatsNoJobWork snjWork = new StatsNoJobWork(parseCtx.getQB().getParseInfo().getTableSpec());
snjWork.setStatsReliable(parseCtx.getConf().getBoolVar(
HiveConf.ConfVars.HIVE_STATS_RELIABLE));
+ // If partition is specified, get pruned partition list
+ Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(parseInfo);
+ if (confirmedParts.size() > 0) {
+ Table source = parseCtx.getQB().getMetaData().getTableForAlias(alias);
+ List<String> partCols = GenMapRedUtils.getPartitionColumns(parseInfo);
+ PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts,
+ partCols, false);
+ snjWork.setPrunedPartitionList(partList);
+ }
Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseCtx.getConf());
ctx.setCurrTask(snjTask);
ctx.setCurrTopOp(null);
http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java
index 9fcc1b2..72a843f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java
@@ -95,16 +95,25 @@ public class ProcessAnalyzeTable implements NodeProcessor {
assert alias != null;
TezWork tezWork = context.currentTask.getWork();
- boolean partialScan = parseInfo.isPartialScanAnalyzeCommand();
- boolean noScan = parseInfo.isNoScanAnalyzeCommand();
- if (inputFormat.equals(OrcInputFormat.class) && (noScan || partialScan)) {
-
+ if (inputFormat.equals(OrcInputFormat.class)) {
+ // For ORC, all the following statements are the same
+ // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
+
// There will not be any Tez job above this task
StatsNoJobWork snjWork = new StatsNoJobWork(parseContext.getQB().getParseInfo().getTableSpec());
snjWork.setStatsReliable(parseContext.getConf().getBoolVar(
HiveConf.ConfVars.HIVE_STATS_RELIABLE));
+ // If partition is specified, get pruned partition list
+ Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(parseInfo);
+ if (confirmedParts.size() > 0) {
+ Table source = parseContext.getQB().getMetaData().getTableForAlias(alias);
+ List<String> partCols = GenMapRedUtils.getPartitionColumns(parseInfo);
+ PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts,
+ partCols, false);
+ snjWork.setPrunedPartitionList(partList);
+ }
Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseContext.getConf());
snjTask.setParentTasks(null);
context.rootTasks.remove(context.currentTask);
http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java
index 5487836..3e5a607 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.plan;
import java.io.Serializable;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec;
+import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
/**
* Client-side stats aggregator task.
@@ -31,6 +32,7 @@ public class StatsNoJobWork implements Serializable {
private tableSpec tableSpecs;
private boolean statsReliable;
+ private PrunedPartitionList prunedPartitionList;
public StatsNoJobWork() {
}
@@ -54,4 +56,12 @@ public class StatsNoJobWork implements Serializable {
public void setStatsReliable(boolean statsReliable) {
this.statsReliable = statsReliable;
}
+
+ public void setPrunedPartitionList(PrunedPartitionList prunedPartitionList) {
+ this.prunedPartitionList = prunedPartitionList;
+ }
+
+ public PrunedPartitionList getPrunedPartitionList() {
+ return prunedPartitionList;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/test/queries/clientpositive/orc_analyze.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/orc_analyze.q b/ql/src/test/queries/clientpositive/orc_analyze.q
index 3621c7a..bd22e6f 100644
--- a/ql/src/test/queries/clientpositive/orc_analyze.q
+++ b/ql/src/test/queries/clientpositive/orc_analyze.q
@@ -30,8 +30,13 @@ STORED AS orc;
INSERT OVERWRITE TABLE orc_create_people SELECT * FROM orc_create_people_staging ORDER BY id;
set hive.stats.autogather = true;
+analyze table orc_create_people compute statistics;
+desc formatted orc_create_people;
+
analyze table orc_create_people compute statistics partialscan;
+desc formatted orc_create_people;
+analyze table orc_create_people compute statistics noscan;
desc formatted orc_create_people;
drop table orc_create_people;
@@ -70,8 +75,15 @@ INSERT OVERWRITE TABLE orc_create_people PARTITION (state)
SELECT * FROM orc_create_people_staging ORDER BY id;
set hive.stats.autogather = true;
+analyze table orc_create_people partition(state) compute statistics;
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="Or");
+
analyze table orc_create_people partition(state) compute statistics partialscan;
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="Or");
+analyze table orc_create_people partition(state) compute statistics noscan;
desc formatted orc_create_people partition(state="Ca");
desc formatted orc_create_people partition(state="Or");
@@ -116,8 +128,15 @@ INSERT OVERWRITE TABLE orc_create_people PARTITION (state)
SELECT * FROM orc_create_people_staging ORDER BY id;
set hive.stats.autogather = true;
+analyze table orc_create_people partition(state) compute statistics;
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="Or");
+
analyze table orc_create_people partition(state) compute statistics partialscan;
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="Or");
+analyze table orc_create_people partition(state) compute statistics noscan;
desc formatted orc_create_people partition(state="Ca");
desc formatted orc_create_people partition(state="Or");
@@ -174,8 +193,15 @@ ALTER TABLE orc_create_people SET SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSer
ALTER TABLE orc_create_people SET FILEFORMAT ORC;
set hive.stats.autogather = true;
-analyze table orc_create_people partition(state) compute statistics noscan;
+analyze table orc_create_people partition(state) compute statistics;
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="OH");
+analyze table orc_create_people partition(state) compute statistics partialscan;
+desc formatted orc_create_people partition(state="Ca");
+desc formatted orc_create_people partition(state="OH");
+
+analyze table orc_create_people partition(state) compute statistics noscan;
desc formatted orc_create_people partition(state="Ca");
desc formatted orc_create_people partition(state="OH");
http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/test/results/clientpositive/annotate_stats_part.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_part.q.out b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
index fb3c17b..d8089e3 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_part.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
@@ -109,14 +109,12 @@ PREHOOK: query: -- partition level analyze statistics for specific parition
analyze table loc_orc partition(year='2001') compute statistics
PREHOOK: type: QUERY
PREHOOK: Input: default@loc_orc
-PREHOOK: Input: default@loc_orc@year=2001
PREHOOK: Output: default@loc_orc
PREHOOK: Output: default@loc_orc@year=2001
POSTHOOK: query: -- partition level analyze statistics for specific parition
analyze table loc_orc partition(year='2001') compute statistics
POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_orc
-POSTHOOK: Input: default@loc_orc@year=2001
POSTHOOK: Output: default@loc_orc
POSTHOOK: Output: default@loc_orc@year=2001
PREHOOK: query: -- basicStatState: PARTIAL colStatState: NONE
@@ -158,11 +156,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 9 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 7 Data size: 678 Basic stats: PARTIAL Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 9 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 7 Data size: 678 Basic stats: PARTIAL Column stats: PARTIAL
ListSink
PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE
@@ -181,19 +179,17 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 7 Data size: 402 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), '2001' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 7 Data size: 402 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: NONE
ListSink
PREHOOK: query: -- partition level analyze statistics for all partitions
analyze table loc_orc partition(year) compute statistics
PREHOOK: type: QUERY
PREHOOK: Input: default@loc_orc
-PREHOOK: Input: default@loc_orc@year=2001
-PREHOOK: Input: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__
PREHOOK: Output: default@loc_orc
PREHOOK: Output: default@loc_orc@year=2001
PREHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__
@@ -201,8 +197,6 @@ POSTHOOK: query: -- partition level analyze statistics for all partitions
analyze table loc_orc partition(year) compute statistics
POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_orc
-POSTHOOK: Input: default@loc_orc@year=2001
-POSTHOOK: Input: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__
POSTHOOK: Output: default@loc_orc
POSTHOOK: Output: default@loc_orc@year=2001
POSTHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__
@@ -222,11 +216,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE
ListSink
PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE
@@ -245,11 +239,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE
@@ -268,11 +262,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: -- both partitions will be pruned
@@ -331,11 +325,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: zip (type: bigint)
outputColumnNames: _col0
- Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: NONE
ListSink
PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL
@@ -354,7 +348,7 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string)
outputColumnNames: _col0
@@ -377,7 +371,7 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: year (type: string)
outputColumnNames: _col0
@@ -402,7 +396,7 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: _col0, _col1
@@ -425,7 +419,7 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 7 Data size: 402 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: _col0, _col1
@@ -448,11 +442,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 325 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE
ListSink
PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL
@@ -471,11 +465,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 8 Data size: 727 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: -- This is to test filter expression evaluation on partition column
@@ -496,7 +490,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 7 Data size: 402 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (locid > 0) (type: boolean)
Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
@@ -532,7 +526,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 7 Data size: 402 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (locid > 0) (type: boolean)
Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
@@ -568,7 +562,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 7 Data size: 402 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 678 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (locid > 0) (type: boolean)
Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/test/results/clientpositive/annotate_stats_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_table.q.out b/ql/src/test/results/clientpositive/annotate_stats_table.q.out
index a74d85c..292ee33 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_table.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_table.q.out
@@ -122,11 +122,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: emp_orc
- Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: lastname (type: string), deptid (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: NONE
ListSink
PREHOOK: query: -- column level partial statistics
@@ -155,11 +155,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: emp_orc
- Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: lastname (type: string), deptid (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: -- all selected columns have statistics
@@ -180,7 +180,7 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: emp_orc
- Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: deptid (type: int)
outputColumnNames: _col0
@@ -213,11 +213,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: emp_orc
- Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: lastname (type: string), deptid (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE
ListSink
PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE
@@ -236,7 +236,7 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: emp_orc
- Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: lastname (type: string)
outputColumnNames: _col0
@@ -259,7 +259,7 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: emp_orc
- Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: deptid (type: int)
outputColumnNames: _col0
@@ -282,7 +282,7 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: emp_orc
- Statistics: Num rows: 48 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 48 Data size: 4512 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: lastname (type: string), deptid (type: int)
outputColumnNames: _col0, _col1
http://git-wip-us.apache.org/repos/asf/hive/blob/9eb95813/ql/src/test/results/clientpositive/limit_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/limit_pushdown.q.out b/ql/src/test/results/clientpositive/limit_pushdown.q.out
index 4abef8c..4703478 100644
--- a/ql/src/test/results/clientpositive/limit_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/limit_pushdown.q.out
@@ -360,38 +360,38 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc
- Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cdouble (type: double)
outputColumnNames: cdouble
- Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: cdouble (type: double)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: double)
sort order: +
Map-reduce partition columns: _col0 (type: double)
- Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.3
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: double)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: double)
outputColumnNames: _col0
- Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 20
- Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -447,22 +447,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc
- Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), cdouble (type: double)
outputColumnNames: ctinyint, cdouble
- Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(DISTINCT cdouble)
keys: ctinyint (type: tinyint), cdouble (type: double)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint), _col1 (type: double)
sort order: ++
Map-reduce partition columns: _col0 (type: tinyint)
- Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.3
Reduce Operator Tree:
Group By Operator
@@ -470,17 +470,17 @@ STAGE PLANS:
keys: KEY._col0 (type: tinyint)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: tinyint), _col1 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 20
- Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -538,22 +538,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc
- Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string)
outputColumnNames: ctinyint, cstring1, cstring2
- Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(DISTINCT cstring1), count(DISTINCT cstring2)
keys: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: tinyint)
- Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.3
Reduce Operator Tree:
Group By Operator
@@ -561,17 +561,17 @@ STAGE PLANS:
keys: KEY._col0 (type: tinyint)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: tinyint), _col1 (type: bigint), _col2 (type: bigint)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 20
- Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat