You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by re...@apache.org on 2017/01/31 13:44:36 UTC
hive git commit: HIVE-15723 Hive should report a warning about
missing table/column statistics to user (reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master f7bc76486 -> 1c2c4858a
HIVE-15723 Hive should report a warning about missing table/column statistics to user (reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1c2c4858
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1c2c4858
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1c2c4858
Branch: refs/heads/master
Commit: 1c2c4858a371eaf99417e6e31e58cb6491a0c588
Parents: f7bc764
Author: Remus Rusanu <re...@apache.org>
Authored: Tue Jan 31 05:44:20 2017 -0800
Committer: Remus Rusanu <re...@apache.org>
Committed: Tue Jan 31 05:44:20 2017 -0800
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 2 +
.../ql/optimizer/calcite/RelOptHiveTable.java | 7 +
.../hadoop/hive/ql/session/SessionState.java | 6 +
.../clientpositive/stats_missing_warning.q | 55 +++++++
.../clientpositive/stats_missing_warning.q.out | 159 +++++++++++++++++++
5 files changed, 229 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1c2c4858/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index d19d2ea..a2ec1f0 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1079,6 +1079,8 @@ public class HiveConf extends Configuration {
+ " expressed as multiple of Local FS write cost"),
HIVE_CBO_COST_MODEL_HDFS_READ("hive.cbo.costmodel.hdfs.read", "1.5", "Default cost of reading a byte from HDFS;"
+ " expressed as multiple of Local FS read cost"),
+ HIVE_CBO_SHOW_WARNINGS("hive.cbo.show.warnings", false,
+ "Toggle display of CBO warnings like missing column stats"),
AGGR_JOIN_TRANSPOSE("hive.transpose.aggr.join", false, "push aggregates through join"),
SEMIJOIN_CONVERSION("hive.enable.semijoin.conversion", true, "convert group by followed by inner equi join into semijoin"),
HIVE_COLUMN_ALIGNMENT("hive.order.columnalignment", true, "Flag to control whether we want to try to align" +
http://git-wip-us.apache.org/repos/asf/hive/blob/1c2c4858/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
index 009d9e5..9faccd7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
@@ -56,6 +56,8 @@ import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.plan.ColStatistics;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.Statistics;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.ql.stats.StatsUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -391,6 +393,11 @@ public class RelOptHiveTable extends RelOptAbstractTable {
noColsMissingStats.getAndAdd(colNamesFailedStats.size());
if (allowNullColumnForMissingStats) {
LOG.warn(logMsg);
+ HiveConf conf = SessionState.getSessionConf();
+ if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_SHOW_WARNINGS)) {
+ LogHelper console = SessionState.getConsole();
+ console.printInfoNoLog(logMsg);
+ }
} else {
LOG.error(logMsg);
throw new RuntimeException(logMsg);
http://git-wip-us.apache.org/repos/asf/hive/blob/1c2c4858/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
index 453e0a5..d607f61 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
@@ -1099,6 +1099,12 @@ public class SessionState {
LOG.info(info + StringUtils.defaultString(detail));
}
+ public void printInfoNoLog(String info) {
+ if (!getIsSilent()) {
+ getInfoStream().println(info);
+ }
+ }
+
public void printError(String error) {
printError(error, null);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/1c2c4858/ql/src/test/queries/clientpositive/stats_missing_warning.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/stats_missing_warning.q b/ql/src/test/queries/clientpositive/stats_missing_warning.q
new file mode 100644
index 0000000..b6cf049
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/stats_missing_warning.q
@@ -0,0 +1,55 @@
+set hive.stats.autogather=false;
+
+-- Explictily DROP vs. CREATE IF NOT EXISTS to ensure stats are not carried over
+DROP TABLE IF EXISTS missing_stats_t1;
+DROP TABLE IF EXISTS missing_stats_t2;
+DROP TABLE IF EXISTS missing_stats_t3;
+CREATE TABLE missing_stats_t1 (key STRING, value STRING);
+CREATE TABLE missing_stats_t2 (key STRING, value STRING);
+CREATE TABLE missing_stats_t3 (key STRING, value STRING);
+
+INSERT INTO missing_stats_t1 (key, value)
+ SELECT key, value
+ FROM src;
+
+INSERT INTO missing_stats_t2 (key, value)
+ SELECT key, value
+ FROM src;
+
+INSERT INTO missing_stats_t3 (key, value)
+ SELECT key, value
+ FROM src;
+
+-- Default should be FALSE
+set hive.cbo.show.warnings=true;
+
+set hive.cbo.enable=true;
+
+-- Should print warning
+set hive.cbo.show.warnings=true;
+
+SELECT COUNT(*)
+FROM missing_stats_t1 t1
+JOIN missing_stats_t2 t2 ON t1.value = t2.key
+JOIN missing_stats_t3 t3 ON t2.key = t3.value;
+
+-- Should not print warning
+set hive.cbo.show.warnings=false;
+
+SELECT COUNT(*)
+FROM missing_stats_t1 t1
+JOIN missing_stats_t2 t2 ON t1.value = t2.key
+JOIN missing_stats_t3 t3 ON t2.key = t3.value;
+
+ANALYZE TABLE missing_stats_t1 COMPUTE STATISTICS FOR COLUMNS;
+ANALYZE TABLE missing_stats_t2 COMPUTE STATISTICS FOR COLUMNS;
+ANALYZE TABLE missing_stats_t3 COMPUTE STATISTICS FOR COLUMNS;
+
+
+-- Warning should be gone
+set hive.cbo.show.warnings=true;
+
+SELECT COUNT(*)
+FROM missing_stats_t1 t1
+JOIN missing_stats_t2 t2 ON t1.value = t2.key
+JOIN missing_stats_t3 t3 ON t2.key = t3.value;
http://git-wip-us.apache.org/repos/asf/hive/blob/1c2c4858/ql/src/test/results/clientpositive/stats_missing_warning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/stats_missing_warning.q.out b/ql/src/test/results/clientpositive/stats_missing_warning.q.out
new file mode 100644
index 0000000..0ed70a0
--- /dev/null
+++ b/ql/src/test/results/clientpositive/stats_missing_warning.q.out
@@ -0,0 +1,159 @@
+PREHOOK: query: DROP TABLE IF EXISTS missing_stats_t1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS missing_stats_t1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE IF EXISTS missing_stats_t2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS missing_stats_t2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE IF EXISTS missing_stats_t3
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS missing_stats_t3
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE missing_stats_t1 (key STRING, value STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@missing_stats_t1
+POSTHOOK: query: CREATE TABLE missing_stats_t1 (key STRING, value STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@missing_stats_t1
+PREHOOK: query: CREATE TABLE missing_stats_t2 (key STRING, value STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@missing_stats_t2
+POSTHOOK: query: CREATE TABLE missing_stats_t2 (key STRING, value STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@missing_stats_t2
+PREHOOK: query: CREATE TABLE missing_stats_t3 (key STRING, value STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@missing_stats_t3
+POSTHOOK: query: CREATE TABLE missing_stats_t3 (key STRING, value STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@missing_stats_t3
+PREHOOK: query: INSERT INTO missing_stats_t1 (key, value)
+ SELECT key, value
+ FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@missing_stats_t1
+POSTHOOK: query: INSERT INTO missing_stats_t1 (key, value)
+ SELECT key, value
+ FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@missing_stats_t1
+POSTHOOK: Lineage: missing_stats_t1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: missing_stats_t1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: INSERT INTO missing_stats_t2 (key, value)
+ SELECT key, value
+ FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@missing_stats_t2
+POSTHOOK: query: INSERT INTO missing_stats_t2 (key, value)
+ SELECT key, value
+ FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@missing_stats_t2
+POSTHOOK: Lineage: missing_stats_t2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: missing_stats_t2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: INSERT INTO missing_stats_t3 (key, value)
+ SELECT key, value
+ FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@missing_stats_t3
+POSTHOOK: query: INSERT INTO missing_stats_t3 (key, value)
+ SELECT key, value
+ FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@missing_stats_t3
+POSTHOOK: Lineage: missing_stats_t3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: missing_stats_t3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT COUNT(*)
+FROM missing_stats_t1 t1
+JOIN missing_stats_t2 t2 ON t1.value = t2.key
+JOIN missing_stats_t3 t3 ON t2.key = t3.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@missing_stats_t1
+PREHOOK: Input: default@missing_stats_t2
+PREHOOK: Input: default@missing_stats_t3
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*)
+FROM missing_stats_t1 t1
+JOIN missing_stats_t2 t2 ON t1.value = t2.key
+JOIN missing_stats_t3 t3 ON t2.key = t3.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@missing_stats_t1
+POSTHOOK: Input: default@missing_stats_t2
+POSTHOOK: Input: default@missing_stats_t3
+#### A masked pattern was here ####
+0
+PREHOOK: query: SELECT COUNT(*)
+FROM missing_stats_t1 t1
+JOIN missing_stats_t2 t2 ON t1.value = t2.key
+JOIN missing_stats_t3 t3 ON t2.key = t3.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@missing_stats_t1
+PREHOOK: Input: default@missing_stats_t2
+PREHOOK: Input: default@missing_stats_t3
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*)
+FROM missing_stats_t1 t1
+JOIN missing_stats_t2 t2 ON t1.value = t2.key
+JOIN missing_stats_t3 t3 ON t2.key = t3.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@missing_stats_t1
+POSTHOOK: Input: default@missing_stats_t2
+POSTHOOK: Input: default@missing_stats_t3
+#### A masked pattern was here ####
+0
+PREHOOK: query: ANALYZE TABLE missing_stats_t1 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@missing_stats_t1
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE missing_stats_t1 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@missing_stats_t1
+#### A masked pattern was here ####
+PREHOOK: query: ANALYZE TABLE missing_stats_t2 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@missing_stats_t2
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE missing_stats_t2 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@missing_stats_t2
+#### A masked pattern was here ####
+PREHOOK: query: ANALYZE TABLE missing_stats_t3 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: default@missing_stats_t3
+#### A masked pattern was here ####
+POSTHOOK: query: ANALYZE TABLE missing_stats_t3 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@missing_stats_t3
+#### A masked pattern was here ####
+PREHOOK: query: SELECT COUNT(*)
+FROM missing_stats_t1 t1
+JOIN missing_stats_t2 t2 ON t1.value = t2.key
+JOIN missing_stats_t3 t3 ON t2.key = t3.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@missing_stats_t1
+PREHOOK: Input: default@missing_stats_t2
+PREHOOK: Input: default@missing_stats_t3
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*)
+FROM missing_stats_t1 t1
+JOIN missing_stats_t2 t2 ON t1.value = t2.key
+JOIN missing_stats_t3 t3 ON t2.key = t3.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@missing_stats_t1
+POSTHOOK: Input: default@missing_stats_t2
+POSTHOOK: Input: default@missing_stats_t3
+#### A masked pattern was here ####
+0