You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2018/07/03 18:37:27 UTC
hive git commit: HIVE-20005 : acid_table_stats, acid_no_buckets,
etc - query result change on the branch (Steve Yeom)
Repository: hive
Updated Branches:
refs/heads/master-txnstats a47a80fed -> eb261cbf5
HIVE-20005 : acid_table_stats, acid_no_buckets, etc - query result change on the branch (Steve Yeom)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/eb261cbf
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/eb261cbf
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/eb261cbf
Branch: refs/heads/master-txnstats
Commit: eb261cbf526e275735f15a3658c188a3b0819b82
Parents: a47a80f
Author: sergey <se...@apache.org>
Authored: Tue Jul 3 11:36:44 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Tue Jul 3 11:36:44 2018 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/exec/FileSinkOperator.java | 2 +-
.../clientpositive/acid_table_stats.q.out | 130 ++-----------------
2 files changed, 13 insertions(+), 119 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/eb261cbf/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
index 267d602..9593975 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
@@ -1208,7 +1208,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
} else if (prevFsp.updaters[0] != null) {
stats = prevFsp.updaters[0].getStats();
}
- if (stats != null) {
+ if (stats != null && !conf.isFullAcidTable()) {
prevFsp.addToStat(StatsSetupConst.RAW_DATA_SIZE, stats.getRawDataSize());
prevFsp.addToStat(StatsSetupConst.ROW_COUNT, stats.getRowCount());
}
http://git-wip-us.apache.org/repos/asf/hive/blob/eb261cbf/ql/src/test/results/clientpositive/acid_table_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/acid_table_stats.q.out b/ql/src/test/results/clientpositive/acid_table_stats.q.out
index 841a5a4..fb064f8 100644
--- a/ql/src/test/results/clientpositive/acid_table_stats.q.out
+++ b/ql/src/test/results/clientpositive/acid_table_stats.q.out
@@ -93,8 +93,9 @@ Database: default
Table: acid
#### A masked pattern was here ####
Partition Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 2
- numRows 2000
+ numRows 1000
rawDataSize 0
totalSize 4063
#### A masked pattern was here ####
@@ -125,58 +126,22 @@ PREHOOK: type: QUERY
POSTHOOK: query: explain select count(*) from acid where ds='2008-04-08'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: acid
- filterExpr: (ds = '2008-04-08') (type: boolean)
- Statistics: Num rows: 2000 Data size: 40630 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- Statistics: Num rows: 2000 Data size: 40630 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Execution mode: vectorized
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 1
Processor Tree:
ListSink
PREHOOK: query: select count(*) from acid where ds='2008-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: default@acid
-PREHOOK: Input: default@acid@ds=2008-04-08
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from acid where ds='2008-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@acid
-POSTHOOK: Input: default@acid@ds=2008-04-08
#### A masked pattern was here ####
1000
PREHOOK: query: analyze table acid partition(ds='2008-04-08') compute statistics
@@ -209,7 +174,7 @@ Database: default
Table: acid
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 2
numRows 1000
rawDataSize 208000
@@ -293,58 +258,22 @@ PREHOOK: type: QUERY
POSTHOOK: query: explain select count(*) from acid where ds='2008-04-08'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: acid
- filterExpr: (ds = '2008-04-08') (type: boolean)
- Statistics: Num rows: 1000 Data size: 208000 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- Statistics: Num rows: 1000 Data size: 208000 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Execution mode: vectorized
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 1
Processor Tree:
ListSink
PREHOOK: query: select count(*) from acid where ds='2008-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: default@acid
-PREHOOK: Input: default@acid@ds=2008-04-08
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from acid where ds='2008-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@acid
-POSTHOOK: Input: default@acid@ds=2008-04-08
#### A masked pattern was here ####
1000
PREHOOK: query: insert into table acid partition(ds) select key,value,ds from srcpart
@@ -388,8 +317,9 @@ Database: default
Table: acid
#### A masked pattern was here ####
Partition Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 4
- numRows 3000
+ numRows 2000
rawDataSize 208000
totalSize 8118
#### A masked pattern was here ####
@@ -434,7 +364,7 @@ Database: default
Table: acid
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 4
numRows 2000
rawDataSize 416000
@@ -456,58 +386,22 @@ PREHOOK: type: QUERY
POSTHOOK: query: explain select count(*) from acid where ds='2008-04-08'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: acid
- filterExpr: (ds = '2008-04-08') (type: boolean)
- Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
- Execution mode: vectorized
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 1
Processor Tree:
ListSink
PREHOOK: query: select count(*) from acid where ds='2008-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: default@acid
-PREHOOK: Input: default@acid@ds=2008-04-08
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from acid where ds='2008-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@acid
-POSTHOOK: Input: default@acid@ds=2008-04-08
#### A masked pattern was here ####
2000
PREHOOK: query: analyze table acid partition(ds='2008-04-08') compute statistics for columns