You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2015/10/06 03:20:51 UTC

hive git commit: backport HIVE-11301: thrift metastore issue when getting stats results in disconnect (Pengcheng Xiong, reviewed by Sergey Shelukhin)

Repository: hive
Updated Branches:
  refs/heads/branch-1.0 2414c350f -> f1939cb5a


backport HIVE-11301: thrift metastore issue when getting stats results in disconnect (Pengcheng Xiong, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f1939cb5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f1939cb5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f1939cb5

Branch: refs/heads/branch-1.0
Commit: f1939cb5ad3e0a10736d986f3ed47b5577da2ef9
Parents: 2414c35
Author: Pengcheng Xiong <px...@apache.org>
Authored: Mon Oct 5 18:20:38 2015 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Mon Oct 5 18:20:38 2015 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/stats/StatsUtils.java |  10 +-
 .../test/queries/clientpositive/stats_ppr_all.q |  24 ++
 .../results/clientpositive/stats_ppr_all.q.out  | 300 +++++++++++++++++++
 3 files changed, 332 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f1939cb5/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index b0bd8ce..26cf56d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -243,8 +243,14 @@ public class StatsUtils {
         }
         Map<String, String> colToTabAlias = new HashMap<String, String>();
         neededColumns = processNeededColumns(schema, neededColumns, colToTabAlias);
-        AggrStats aggrStats = Hive.get().getAggrColStatsFor(table.getDbName(), table.getTableName(),
-            neededColumns, partNames);
+        AggrStats aggrStats = null;
+        // We check the sizes of neededColumns and partNames here. If either
+        // size is 0, aggrStats is null after several retries. Thus, we can
+        // skip the step to connect to the metastore.
+        if (neededColumns.size() > 0 && partNames.size() > 0) {
+          aggrStats = Hive.get().getAggrColStatsFor(table.getDbName(), table.getTableName(),
+              neededColumns, partNames);
+        }
         if (null == aggrStats || null == aggrStats.getColStats()
             || aggrStats.getColStatsSize() == 0) {
           // There are some partitions with no state (or we didn't fetch any state).

http://git-wip-us.apache.org/repos/asf/hive/blob/f1939cb5/ql/src/test/queries/clientpositive/stats_ppr_all.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/stats_ppr_all.q b/ql/src/test/queries/clientpositive/stats_ppr_all.q
new file mode 100644
index 0000000..a5630cb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/stats_ppr_all.q
@@ -0,0 +1,24 @@
+set hive.stats.fetch.column.stats=true;
+
+drop table ss;
+
+CREATE TABLE ss (
+    sales_order_id  BIGINT,
+    order_amount    FLOAT)
+PARTITIONED BY (country STRING, year INT, month INT, day INT) stored as orc;
+
+insert overwrite table ss partition(country="US", year=2015, month=1, day=1) select 1, 22.0 from src limit 1;
+insert overwrite table ss partition(country="US", year=2015, month=2, day=1) select 2, 2.0 from src limit 1;
+insert overwrite table ss partition(country="US", year=2015, month=1, day=2) select 1, 2.0 from src limit 1;
+
+ANALYZE TABLE ss PARTITION(country,year,month,day) compute statistics for columns;
+
+explain select sum(order_amount) from ss where (country="US" and year=2015 and month=2 and day=1);
+
+explain select sum(order_amount) from ss where (year*10000+month*100+day) = "2015010" and 1>0;
+
+explain select sum(order_amount) from ss where (year*100+month*10+day) = "201511" and 1>0;
+
+explain select sum(order_amount) from ss where (year*100+month*10+day) > "201511" and 1>0;
+
+explain select '1' from ss where (year*100+month*10+day) > "201511";
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/f1939cb5/ql/src/test/results/clientpositive/stats_ppr_all.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/stats_ppr_all.q.out b/ql/src/test/results/clientpositive/stats_ppr_all.q.out
new file mode 100644
index 0000000..d00c91e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/stats_ppr_all.q.out
@@ -0,0 +1,300 @@
+PREHOOK: query: drop table ss
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table ss
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE ss (
+    sales_order_id  BIGINT,
+    order_amount    FLOAT)
+PARTITIONED BY (country STRING, year INT, month INT, day INT) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ss
+POSTHOOK: query: CREATE TABLE ss (
+    sales_order_id  BIGINT,
+    order_amount    FLOAT)
+PARTITIONED BY (country STRING, year INT, month INT, day INT) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ss
+PREHOOK: query: insert overwrite table ss partition(country="US", year=2015, month=1, day=1) select 1, 22.0 from src limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@ss@country=US/year=2015/month=1/day=1
+POSTHOOK: query: insert overwrite table ss partition(country="US", year=2015, month=1, day=1) select 1, 22.0 from src limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@ss@country=US/year=2015/month=1/day=1
+POSTHOOK: Lineage: ss PARTITION(country=US,year=2015,month=1,day=1).order_amount EXPRESSION []
+POSTHOOK: Lineage: ss PARTITION(country=US,year=2015,month=1,day=1).sales_order_id EXPRESSION []
+PREHOOK: query: insert overwrite table ss partition(country="US", year=2015, month=2, day=1) select 2, 2.0 from src limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@ss@country=US/year=2015/month=2/day=1
+POSTHOOK: query: insert overwrite table ss partition(country="US", year=2015, month=2, day=1) select 2, 2.0 from src limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@ss@country=US/year=2015/month=2/day=1
+POSTHOOK: Lineage: ss PARTITION(country=US,year=2015,month=2,day=1).order_amount EXPRESSION []
+POSTHOOK: Lineage: ss PARTITION(country=US,year=2015,month=2,day=1).sales_order_id EXPRESSION []
+PREHOOK: query: insert overwrite table ss partition(country="US", year=2015, month=1, day=2) select 1, 2.0 from src limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@ss@country=US/year=2015/month=1/day=2
+POSTHOOK: query: insert overwrite table ss partition(country="US", year=2015, month=1, day=2) select 1, 2.0 from src limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@ss@country=US/year=2015/month=1/day=2
+POSTHOOK: Lineage: ss PARTITION(country=US,year=2015,month=1,day=2).order_amount EXPRESSION []
+POSTHOOK: Lineage: ss PARTITION(country=US,year=2015,month=1,day=2).sales_order_id EXPRESSION []
+PREHOOK: query: ANALYZE TABLE ss PARTITION(country,year,month,day) compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ss
+PREHOOK: Input: default@ss@country=US/year=2015/month=1/day=1
+PREHOOK: Input: default@ss@country=US/year=2015/month=1/day=2
+PREHOOK: Input: default@ss@country=US/year=2015/month=2/day=1
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE ss PARTITION(country,year,month,day) compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ss
+POSTHOOK: Input: default@ss@country=US/year=2015/month=1/day=1
+POSTHOOK: Input: default@ss@country=US/year=2015/month=1/day=2
+POSTHOOK: Input: default@ss@country=US/year=2015/month=2/day=1
+#### A masked pattern was here ####
+PREHOOK: query: explain select sum(order_amount) from ss where (country="US" and year=2015 and month=2 and day=1)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select sum(order_amount) from ss where (country="US" and year=2015 and month=2 and day=1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: ss
+            Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+            Select Operator
+              expressions: order_amount (type: float)
+              outputColumnNames: order_amount
+              Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+              Group By Operator
+                aggregations: sum(order_amount)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col0 (type: double)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+          Select Operator
+            expressions: _col0 (type: double)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select sum(order_amount) from ss where (year*10000+month*100+day) = "2015010" and 1>0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select sum(order_amount) from ss where (year*10000+month*100+day) = "2015010" and 1>0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: ss
+            Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL
+            Filter Operator
+              predicate: ((((year * 10000) + (month * 100)) + day) = '2015010') (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL
+              Select Operator
+                expressions: order_amount (type: float)
+                outputColumnNames: order_amount
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL
+                Group By Operator
+                  aggregations: sum(order_amount)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL
+                    value expressions: _col0 (type: double)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL
+          Select Operator
+            expressions: _col0 (type: double)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select sum(order_amount) from ss where (year*100+month*10+day) = "201511" and 1>0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select sum(order_amount) from ss where (year*100+month*10+day) = "201511" and 1>0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: ss
+            Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+            Select Operator
+              expressions: order_amount (type: float)
+              outputColumnNames: order_amount
+              Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+              Group By Operator
+                aggregations: sum(order_amount)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col0 (type: double)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+          Select Operator
+            expressions: _col0 (type: double)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select sum(order_amount) from ss where (year*100+month*10+day) > "201511" and 1>0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select sum(order_amount) from ss where (year*100+month*10+day) > "201511" and 1>0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: ss
+            Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+            Filter Operator
+              predicate: (((201500 + (month * 10)) + day) > '201511') (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+              Select Operator
+                expressions: order_amount (type: float)
+                outputColumnNames: order_amount
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: sum(order_amount)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col0 (type: double)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+          Select Operator
+            expressions: _col0 (type: double)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select '1' from ss where (year*100+month*10+day) > "201511"
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select '1' from ss where (year*100+month*10+day) > "201511"
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: ss
+          Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+          Filter Operator
+            predicate: (((201500 + (month * 10)) + day) > '201511') (type: boolean)
+            Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+            Select Operator
+              expressions: '1' (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
+              ListSink
+