You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by re...@apache.org on 2017/01/31 13:44:36 UTC

hive git commit: HIVE-15723 Hive should report a warning about missing table/column statistics to user (reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master f7bc76486 -> 1c2c4858a


HIVE-15723 Hive should report a warning about missing table/column statistics to user (reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1c2c4858
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1c2c4858
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1c2c4858

Branch: refs/heads/master
Commit: 1c2c4858a371eaf99417e6e31e58cb6491a0c588
Parents: f7bc764
Author: Remus Rusanu <re...@apache.org>
Authored: Tue Jan 31 05:44:20 2017 -0800
Committer: Remus Rusanu <re...@apache.org>
Committed: Tue Jan 31 05:44:20 2017 -0800

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   2 +
 .../ql/optimizer/calcite/RelOptHiveTable.java   |   7 +
 .../hadoop/hive/ql/session/SessionState.java    |   6 +
 .../clientpositive/stats_missing_warning.q      |  55 +++++++
 .../clientpositive/stats_missing_warning.q.out  | 159 +++++++++++++++++++
 5 files changed, 229 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/1c2c4858/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index d19d2ea..a2ec1f0 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1079,6 +1079,8 @@ public class HiveConf extends Configuration {
                                                                  + " expressed as multiple of Local FS write cost"),
     HIVE_CBO_COST_MODEL_HDFS_READ("hive.cbo.costmodel.hdfs.read", "1.5", "Default cost of reading a byte from HDFS;"
                                                                  + " expressed as multiple of Local FS read cost"),
+    HIVE_CBO_SHOW_WARNINGS("hive.cbo.show.warnings", false,
+         "Toggle display of CBO warnings like missing column stats"),
     AGGR_JOIN_TRANSPOSE("hive.transpose.aggr.join", false, "push aggregates through join"),
     SEMIJOIN_CONVERSION("hive.enable.semijoin.conversion", true, "convert group by followed by inner equi join into semijoin"),
     HIVE_COLUMN_ALIGNMENT("hive.order.columnalignment", true, "Flag to control whether we want to try to align" +

http://git-wip-us.apache.org/repos/asf/hive/blob/1c2c4858/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
index 009d9e5..9faccd7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
@@ -56,6 +56,8 @@ import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
 import org.apache.hadoop.hive.ql.plan.ColStatistics;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.Statistics;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
 import org.apache.hadoop.hive.ql.stats.StatsUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -391,6 +393,11 @@ public class RelOptHiveTable extends RelOptAbstractTable {
       noColsMissingStats.getAndAdd(colNamesFailedStats.size());
       if (allowNullColumnForMissingStats) {
         LOG.warn(logMsg);
+        HiveConf conf = SessionState.getSessionConf();
+        if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_SHOW_WARNINGS)) {
+          LogHelper console = SessionState.getConsole();
+          console.printInfoNoLog(logMsg);
+        }
       } else {
         LOG.error(logMsg);
         throw new RuntimeException(logMsg);

http://git-wip-us.apache.org/repos/asf/hive/blob/1c2c4858/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
index 453e0a5..d607f61 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
@@ -1099,6 +1099,12 @@ public class SessionState {
       LOG.info(info + StringUtils.defaultString(detail));
     }
 
+    public void printInfoNoLog(String info) {
+      if (!getIsSilent()) {
+        getInfoStream().println(info);
+      }
+    }
+
     public void printError(String error) {
       printError(error, null);
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/1c2c4858/ql/src/test/queries/clientpositive/stats_missing_warning.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/stats_missing_warning.q b/ql/src/test/queries/clientpositive/stats_missing_warning.q
new file mode 100644
index 0000000..b6cf049
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/stats_missing_warning.q
@@ -0,0 +1,55 @@
+set hive.stats.autogather=false;
+
+-- Explictily DROP vs. CREATE IF NOT EXISTS to ensure stats are not carried over
+DROP TABLE IF EXISTS missing_stats_t1;
+DROP TABLE IF EXISTS missing_stats_t2;
+DROP TABLE IF EXISTS missing_stats_t3;
+CREATE TABLE missing_stats_t1 (key STRING, value STRING);
+CREATE TABLE missing_stats_t2 (key STRING, value STRING);
+CREATE TABLE missing_stats_t3 (key STRING, value STRING);
+
+INSERT INTO missing_stats_t1 (key, value)
+   SELECT key, value
+   FROM src;
+
+INSERT INTO missing_stats_t2 (key, value)
+   SELECT key, value
+   FROM src;
+
+INSERT INTO missing_stats_t3 (key, value)
+   SELECT key, value
+   FROM src;
+ 
+-- Default should be FALSE
+set hive.cbo.show.warnings=true;
+
+set hive.cbo.enable=true;
+
+-- Should print warning
+set hive.cbo.show.warnings=true;
+
+SELECT COUNT(*)
+FROM missing_stats_t1 t1
+JOIN missing_stats_t2 t2 ON t1.value = t2.key
+JOIN missing_stats_t3 t3 ON t2.key = t3.value;
+
+-- Should not print warning
+set hive.cbo.show.warnings=false;
+
+SELECT COUNT(*)
+FROM missing_stats_t1 t1
+JOIN missing_stats_t2 t2 ON t1.value = t2.key
+JOIN missing_stats_t3 t3 ON t2.key = t3.value;
+
+ANALYZE TABLE missing_stats_t1 COMPUTE STATISTICS FOR COLUMNS;
+ANALYZE TABLE missing_stats_t2 COMPUTE STATISTICS FOR COLUMNS;
+ANALYZE TABLE missing_stats_t3 COMPUTE STATISTICS FOR COLUMNS;
+
+
+-- Warning should be gone
+set hive.cbo.show.warnings=true;
+
+SELECT COUNT(*)
+FROM missing_stats_t1 t1
+JOIN missing_stats_t2 t2 ON t1.value = t2.key
+JOIN missing_stats_t3 t3 ON t2.key = t3.value;

http://git-wip-us.apache.org/repos/asf/hive/blob/1c2c4858/ql/src/test/results/clientpositive/stats_missing_warning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/stats_missing_warning.q.out b/ql/src/test/results/clientpositive/stats_missing_warning.q.out
new file mode 100644
index 0000000..0ed70a0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/stats_missing_warning.q.out
@@ -0,0 +1,159 @@
+PREHOOK: query: DROP TABLE IF EXISTS missing_stats_t1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS missing_stats_t1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE IF EXISTS missing_stats_t2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS missing_stats_t2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE IF EXISTS missing_stats_t3
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS missing_stats_t3
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE missing_stats_t1 (key STRING, value STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@missing_stats_t1
+POSTHOOK: query: CREATE TABLE missing_stats_t1 (key STRING, value STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@missing_stats_t1
+PREHOOK: query: CREATE TABLE missing_stats_t2 (key STRING, value STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@missing_stats_t2
+POSTHOOK: query: CREATE TABLE missing_stats_t2 (key STRING, value STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@missing_stats_t2
+PREHOOK: query: CREATE TABLE missing_stats_t3 (key STRING, value STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@missing_stats_t3
+POSTHOOK: query: CREATE TABLE missing_stats_t3 (key STRING, value STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@missing_stats_t3
+PREHOOK: query: INSERT INTO missing_stats_t1 (key, value)
+   SELECT key, value
+   FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@missing_stats_t1
+POSTHOOK: query: INSERT INTO missing_stats_t1 (key, value)
+   SELECT key, value
+   FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@missing_stats_t1
+POSTHOOK: Lineage: missing_stats_t1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: missing_stats_t1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: INSERT INTO missing_stats_t2 (key, value)
+   SELECT key, value
+   FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@missing_stats_t2
+POSTHOOK: query: INSERT INTO missing_stats_t2 (key, value)
+   SELECT key, value
+   FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@missing_stats_t2
+POSTHOOK: Lineage: missing_stats_t2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: missing_stats_t2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: INSERT INTO missing_stats_t3 (key, value)
+   SELECT key, value
+   FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@missing_stats_t3
+POSTHOOK: query: INSERT INTO missing_stats_t3 (key, value)
+   SELECT key, value
+   FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@missing_stats_t3
+POSTHOOK: Lineage: missing_stats_t3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: missing_stats_t3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT COUNT(*)
+FROM missing_stats_t1 t1
+JOIN missing_stats_t2 t2 ON t1.value = t2.key
+JOIN missing_stats_t3 t3 ON t2.key = t3.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@missing_stats_t1
+PREHOOK: Input: default@missing_stats_t2
+PREHOOK: Input: default@missing_stats_t3
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*)
+FROM missing_stats_t1 t1
+JOIN missing_stats_t2 t2 ON t1.value = t2.key
+JOIN missing_stats_t3 t3 ON t2.key = t3.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@missing_stats_t1
+POSTHOOK: Input: default@missing_stats_t2
+POSTHOOK: Input: default@missing_stats_t3
+#### A masked pattern was here ####
+0
+PREHOOK: query: SELECT COUNT(*)
+FROM missing_stats_t1 t1
+JOIN missing_stats_t2 t2 ON t1.value = t2.key
+JOIN missing_stats_t3 t3 ON t2.key = t3.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@missing_stats_t1
+PREHOOK: Input: default@missing_stats_t2
+PREHOOK: Input: default@missing_stats_t3
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*)
+FROM missing_stats_t1 t1
+JOIN missing_stats_t2 t2 ON t1.value = t2.key
+JOIN missing_stats_t3 t3 ON t2.key = t3.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@missing_stats_t1
+POSTHOOK: Input: default@missing_stats_t2
+POSTHOOK: Input: default@missing_stats_t3
+#### A masked pattern was here ####
+0
+PREHOOK: query: ANALYZE TABLE missing_stats_t1 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@missing_stats_t1
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE missing_stats_t1 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@missing_stats_t1
+#### A masked pattern was here ####
+PREHOOK: query: ANALYZE TABLE missing_stats_t2 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@missing_stats_t2
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE missing_stats_t2 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@missing_stats_t2
+#### A masked pattern was here ####
+PREHOOK: query: ANALYZE TABLE missing_stats_t3 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@missing_stats_t3
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE missing_stats_t3 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@missing_stats_t3
+#### A masked pattern was here ####
+PREHOOK: query: SELECT COUNT(*)
+FROM missing_stats_t1 t1
+JOIN missing_stats_t2 t2 ON t1.value = t2.key
+JOIN missing_stats_t3 t3 ON t2.key = t3.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@missing_stats_t1
+PREHOOK: Input: default@missing_stats_t2
+PREHOOK: Input: default@missing_stats_t3
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*)
+FROM missing_stats_t1 t1
+JOIN missing_stats_t2 t2 ON t1.value = t2.key
+JOIN missing_stats_t3 t3 ON t2.key = t3.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@missing_stats_t1
+POSTHOOK: Input: default@missing_stats_t2
+POSTHOOK: Input: default@missing_stats_t3
+#### A masked pattern was here ####
+0