You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2018/07/03 18:37:27 UTC

hive git commit: HIVE-20005 : acid_table_stats, acid_no_buckets, etc - query result change on the branch (Steve Yeom)

Repository: hive
Updated Branches:
  refs/heads/master-txnstats a47a80fed -> eb261cbf5


HIVE-20005 : acid_table_stats, acid_no_buckets, etc - query result change on the branch (Steve Yeom)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/eb261cbf
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/eb261cbf
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/eb261cbf

Branch: refs/heads/master-txnstats
Commit: eb261cbf526e275735f15a3658c188a3b0819b82
Parents: a47a80f
Author: sergey <se...@apache.org>
Authored: Tue Jul 3 11:36:44 2018 -0700
Committer: sergey <se...@apache.org>
Committed: Tue Jul 3 11:36:44 2018 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/FileSinkOperator.java   |   2 +-
 .../clientpositive/acid_table_stats.q.out       | 130 ++-----------------
 2 files changed, 13 insertions(+), 119 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/eb261cbf/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
index 267d602..9593975 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
@@ -1208,7 +1208,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
             } else if (prevFsp.updaters[0] != null) {
               stats = prevFsp.updaters[0].getStats();
             }
-            if (stats != null) {
+            if (stats != null && !conf.isFullAcidTable()) {
                 prevFsp.addToStat(StatsSetupConst.RAW_DATA_SIZE, stats.getRawDataSize());
                 prevFsp.addToStat(StatsSetupConst.ROW_COUNT, stats.getRowCount());
             }

http://git-wip-us.apache.org/repos/asf/hive/blob/eb261cbf/ql/src/test/results/clientpositive/acid_table_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/acid_table_stats.q.out b/ql/src/test/results/clientpositive/acid_table_stats.q.out
index 841a5a4..fb064f8 100644
--- a/ql/src/test/results/clientpositive/acid_table_stats.q.out
+++ b/ql/src/test/results/clientpositive/acid_table_stats.q.out
@@ -93,8 +93,9 @@ Database:           	default
 Table:              	acid                	 
 #### A masked pattern was here ####
 Partition Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
 	numFiles            	2                   
-	numRows             	2000                
+	numRows             	1000                
 	rawDataSize         	0                   
 	totalSize           	4063                
 #### A masked pattern was here ####
@@ -125,58 +126,22 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain select count(*) from acid where ds='2008-04-08'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-0 is a root stage
 
 STAGE PLANS:
-  Stage: Stage-1
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: acid
-            filterExpr: (ds = '2008-04-08') (type: boolean)
-            Statistics: Num rows: 2000 Data size: 40630 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              Statistics: Num rows: 2000 Data size: 40630 Basic stats: COMPLETE Column stats: NONE
-              Group By Operator
-                aggregations: count()
-                mode: hash
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  sort order: 
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: bigint)
-      Execution mode: vectorized
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: count(VALUE._col0)
-          mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-          File Output Operator
-            compressed: false
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
   Stage: Stage-0
     Fetch Operator
-      limit: -1
+      limit: 1
       Processor Tree:
         ListSink
 
 PREHOOK: query: select count(*) from acid where ds='2008-04-08'
 PREHOOK: type: QUERY
 PREHOOK: Input: default@acid
-PREHOOK: Input: default@acid@ds=2008-04-08
 #### A masked pattern was here ####
 POSTHOOK: query: select count(*) from acid where ds='2008-04-08'
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@acid
-POSTHOOK: Input: default@acid@ds=2008-04-08
 #### A masked pattern was here ####
 1000
 PREHOOK: query: analyze table acid partition(ds='2008-04-08') compute statistics
@@ -209,7 +174,7 @@ Database:           	default
 Table:              	acid                	 
 #### A masked pattern was here ####
 Partition Parameters:	 	 
-	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
 	numFiles            	2                   
 	numRows             	1000                
 	rawDataSize         	208000              
@@ -293,58 +258,22 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain select count(*) from acid where ds='2008-04-08'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-0 is a root stage
 
 STAGE PLANS:
-  Stage: Stage-1
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: acid
-            filterExpr: (ds = '2008-04-08') (type: boolean)
-            Statistics: Num rows: 1000 Data size: 208000 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              Statistics: Num rows: 1000 Data size: 208000 Basic stats: COMPLETE Column stats: NONE
-              Group By Operator
-                aggregations: count()
-                mode: hash
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  sort order: 
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: bigint)
-      Execution mode: vectorized
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: count(VALUE._col0)
-          mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-          File Output Operator
-            compressed: false
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
   Stage: Stage-0
     Fetch Operator
-      limit: -1
+      limit: 1
       Processor Tree:
         ListSink
 
 PREHOOK: query: select count(*) from acid where ds='2008-04-08'
 PREHOOK: type: QUERY
 PREHOOK: Input: default@acid
-PREHOOK: Input: default@acid@ds=2008-04-08
 #### A masked pattern was here ####
 POSTHOOK: query: select count(*) from acid where ds='2008-04-08'
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@acid
-POSTHOOK: Input: default@acid@ds=2008-04-08
 #### A masked pattern was here ####
 1000
 PREHOOK: query: insert into table acid partition(ds)  select key,value,ds from srcpart
@@ -388,8 +317,9 @@ Database:           	default
 Table:              	acid                	 
 #### A masked pattern was here ####
 Partition Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
 	numFiles            	4                   
-	numRows             	3000                
+	numRows             	2000                
 	rawDataSize         	208000              
 	totalSize           	8118                
 #### A masked pattern was here ####
@@ -434,7 +364,7 @@ Database:           	default
 Table:              	acid                	 
 #### A masked pattern was here ####
 Partition Parameters:	 	 
-	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
 	numFiles            	4                   
 	numRows             	2000                
 	rawDataSize         	416000              
@@ -456,58 +386,22 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain select count(*) from acid where ds='2008-04-08'
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-0 is a root stage
 
 STAGE PLANS:
-  Stage: Stage-1
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: acid
-            filterExpr: (ds = '2008-04-08') (type: boolean)
-            Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE
-            Select Operator
-              Statistics: Num rows: 2000 Data size: 416000 Basic stats: COMPLETE Column stats: NONE
-              Group By Operator
-                aggregations: count()
-                mode: hash
-                outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  sort order: 
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: bigint)
-      Execution mode: vectorized
-      Reduce Operator Tree:
-        Group By Operator
-          aggregations: count(VALUE._col0)
-          mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-          File Output Operator
-            compressed: false
-            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
   Stage: Stage-0
     Fetch Operator
-      limit: -1
+      limit: 1
       Processor Tree:
         ListSink
 
 PREHOOK: query: select count(*) from acid where ds='2008-04-08'
 PREHOOK: type: QUERY
 PREHOOK: Input: default@acid
-PREHOOK: Input: default@acid@ds=2008-04-08
 #### A masked pattern was here ####
 POSTHOOK: query: select count(*) from acid where ds='2008-04-08'
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@acid
-POSTHOOK: Input: default@acid@ds=2008-04-08
 #### A masked pattern was here ####
 2000
 PREHOOK: query: analyze table acid partition(ds='2008-04-08') compute statistics for columns