You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2018/04/26 14:58:59 UTC

[35/50] [abbrv] hive git commit: HIVE-19247 : StatsOptimizer: Missing stats fast-path for Date (Gopal V via Ashutosh Chauhan)

HIVE-19247 : StatsOptimizer: Missing stats fast-path for Date (Gopal V via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/34ced306
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/34ced306
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/34ced306

Branch: refs/heads/storage-branch-2.6
Commit: 34ced3062f0b5083049cf1c94aa6d5335ee923c7
Parents: 63923e7
Author: Gopal V <go...@apache.org>
Authored: Tue Apr 24 21:51:22 2018 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue Apr 24 21:51:22 2018 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |  3 +-
 .../hive/ql/optimizer/StatsOptimizer.java       | 97 ++++++++++++++++++--
 ql/src/test/queries/clientpositive/stats_date.q | 18 ++++
 .../clientpositive/llap/stats_date.q.out        | 80 ++++++++++++++++
 4 files changed, 189 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/34ced306/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index f32b431..2c1a76d 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -877,7 +877,8 @@ minillaplocal.query.files=\
   unionDistinct_3.q,\
   vectorized_join46.q,\
   vectorized_multi_output_select.q,\
-  partialdhj.q
+  partialdhj.q,\
+  stats_date.q
 
 encrypted.query.files=encryption_join_unencrypted_tbl.q,\
   encryption_insert_partition_static.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/34ced306/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index d26a48b..a574372 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hive.ql.optimizer;
 
+import java.sql.Date;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
@@ -30,6 +31,7 @@ import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
 import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
@@ -72,6 +74,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum;
 import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
@@ -146,11 +150,12 @@ public class StatsOptimizer extends Transform {
     }
 
     enum StatType{
-      Integeral,
+      Integer,
       Double,
       String,
       Boolean,
       Binary,
+      Date,
       Unsupported
     }
 
@@ -163,7 +168,6 @@ public class StatsOptimizer extends Transform {
       Object cast(long longValue) { return (short)longValue; } },
       TINYINT { @Override
       Object cast(long longValue) { return (byte)longValue; } };
-
       abstract Object cast(long longValue);
     }
 
@@ -175,6 +179,13 @@ public class StatsOptimizer extends Transform {
 
       abstract Object cast(double doubleValue);
     }
+    
+    enum DateSubType {
+      DAYS {@Override
+        Object cast(long longValue) { return (new DateWritable((int)longValue)).get();}
+      };
+      abstract Object cast(long longValue);
+    }
 
     enum GbyKeyType {
       NULL, CONSTANT, OTHER
@@ -182,7 +193,7 @@ public class StatsOptimizer extends Transform {
 
     private StatType getType(String origType) {
       if (serdeConstants.IntegralTypes.contains(origType)) {
-        return StatType.Integeral;
+        return StatType.Integer;
       } else if (origType.equals(serdeConstants.DOUBLE_TYPE_NAME) ||
           origType.equals(serdeConstants.FLOAT_TYPE_NAME)) {
         return StatType.Double;
@@ -192,6 +203,8 @@ public class StatsOptimizer extends Transform {
         return StatType.Boolean;
       } else if (origType.equals(serdeConstants.STRING_TYPE_NAME)) {
         return StatType.String;
+      } else if (origType.equals(serdeConstants.DATE_TYPE_NAME)) {
+        return StatType.Date;
       }
       return StatType.Unsupported;
     }
@@ -199,7 +212,7 @@ public class StatsOptimizer extends Transform {
     private Long getNullcountFor(StatType type, ColumnStatisticsData statData) {
 
       switch(type) {
-      case Integeral :
+      case Integer :
         return statData.getLongStats().getNumNulls();
       case Double:
         return statData.getDoubleStats().getNumNulls();
@@ -209,6 +222,8 @@ public class StatsOptimizer extends Transform {
         return statData.getBooleanStats().getNumNulls();
       case Binary:
         return statData.getBinaryStats().getNumNulls();
+      case Date:
+        return statData.getDateStats().getNumNulls();
       default:
         return null;
       }
@@ -515,7 +530,7 @@ public class StatsOptimizer extends Transform {
               ColumnStatisticsData statData = stats.get(0).getStatsData();
               String name = colDesc.getTypeString().toUpperCase();
               switch (type) {
-                case Integeral: {
+                case Integer: {
                   LongSubType subType = LongSubType.valueOf(name);
                   LongColumnStatsData lstats = statData.getLongStats();
                   if (lstats.isSetHighValue()) {
@@ -535,6 +550,15 @@ public class StatsOptimizer extends Transform {
                   }
                   break;
                 }
+                case Date: {
+                  DateColumnStatsData dstats = statData.getDateStats();
+                  if (dstats.isSetHighValue()) {
+                    oneRow.add(DateSubType.DAYS.cast(dstats.getHighValue().getDaysSinceEpoch())); 
+                  } else {
+                    oneRow.add(null);
+                  }
+                  break;
+                }
                 default:
                   // unsupported type
                   Logger.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
@@ -546,7 +570,7 @@ public class StatsOptimizer extends Transform {
                   tsOp.getConf().getAlias(), tsOp).getPartitions();
               String name = colDesc.getTypeString().toUpperCase();
               switch (type) {
-                case Integeral: {
+                case Integer: {
                   LongSubType subType = LongSubType.valueOf(name);
 
                   Long maxVal = null;
@@ -598,6 +622,30 @@ public class StatsOptimizer extends Transform {
                   }
                   break;
                 }
+                case Date: {
+                  Long maxVal = null;
+                  Collection<List<ColumnStatisticsObj>> result =
+                      verifyAndGetPartColumnStats(hive, tbl, colName, parts);
+                  if (result == null) {
+                    return null; // logging inside
+                  }
+                  for (List<ColumnStatisticsObj> statObj : result) {
+                    ColumnStatisticsData statData = validateSingleColStat(statObj);
+                    if (statData == null) return null;
+                    DateColumnStatsData dstats = statData.getDateStats();
+                    if (!dstats.isSetHighValue()) {
+                      continue;
+                    }
+                    long curVal = dstats.getHighValue().getDaysSinceEpoch();
+                    maxVal = maxVal == null ? curVal : Math.max(maxVal, curVal);
+                  }
+                  if (maxVal != null) {
+                    oneRow.add(DateSubType.DAYS.cast(maxVal));
+                  } else {
+                    oneRow.add(null);
+                  }
+                  break;
+                }
                 default:
                   Logger.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
                       "metadata optimizer for column : " + colName);
@@ -619,7 +667,7 @@ public class StatsOptimizer extends Transform {
                   .get(0).getStatsData();
               String name = colDesc.getTypeString().toUpperCase();
               switch (type) {
-                case Integeral: {
+                case Integer: {
                   LongSubType subType = LongSubType.valueOf(name);
                   LongColumnStatsData lstats = statData.getLongStats();
                   if (lstats.isSetLowValue()) {
@@ -639,6 +687,15 @@ public class StatsOptimizer extends Transform {
                   }
                   break;
                 }
+                case Date: {
+                  DateColumnStatsData dstats = statData.getDateStats();
+                  if (dstats.isSetLowValue()) {
+                    oneRow.add(DateSubType.DAYS.cast(dstats.getLowValue().getDaysSinceEpoch())); 
+                  } else {
+                    oneRow.add(null);
+                  }
+                  break;
+                }
                 default: // unsupported type
                   Logger.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
                       "metadata optimizer for column : " + colName);
@@ -648,7 +705,7 @@ public class StatsOptimizer extends Transform {
               Set<Partition> parts = pctx.getPrunedPartitions(tsOp.getConf().getAlias(), tsOp).getPartitions();
               String name = colDesc.getTypeString().toUpperCase();
               switch(type) {
-                case Integeral: {
+                case Integer: {
                   LongSubType subType = LongSubType.valueOf(name);
 
                   Long minVal = null;
@@ -700,6 +757,30 @@ public class StatsOptimizer extends Transform {
                   }
                   break;
                 }
+                case Date: {
+                  Long minVal = null;
+                  Collection<List<ColumnStatisticsObj>> result =
+                      verifyAndGetPartColumnStats(hive, tbl, colName, parts);
+                  if (result == null) {
+                    return null; // logging inside
+                  }
+                  for (List<ColumnStatisticsObj> statObj : result) {
+                    ColumnStatisticsData statData = validateSingleColStat(statObj);
+                    if (statData == null) return null;
+                    DateColumnStatsData dstats = statData.getDateStats();
+                    if (!dstats.isSetLowValue()) {
+                      continue;
+                    }
+                    long curVal = dstats.getLowValue().getDaysSinceEpoch();
+                    minVal = minVal == null ? curVal : Math.min(minVal, curVal);
+                  }
+                  if (minVal != null) {
+                    oneRow.add(DateSubType.DAYS.cast(minVal));
+                  } else {
+                    oneRow.add(null);
+                  }
+                  break;
+                }
                 default: // unsupported type
                   Logger.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
                       "metadata optimizer for column : " + colName);

http://git-wip-us.apache.org/repos/asf/hive/blob/34ced306/ql/src/test/queries/clientpositive/stats_date.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/stats_date.q b/ql/src/test/queries/clientpositive/stats_date.q
new file mode 100644
index 0000000..da1ef58
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/stats_date.q
@@ -0,0 +1,18 @@
+
+create table foo(x date, y timestamp) stored as orc;
+
+insert into foo values('1999-01-01', '1999-01-01 00:00:01'), ('2018-01-01', '2018-01-01 23:23:59');
+
+analyze table foo compute statistics for columns;
+
+set hive.compute.query.using.stats=true;
+
+set test.comment=All queries need to be just metadata fetch tasks
+
+explain select min(x) from foo; 
+explain select max(x) from foo; 
+explain select count(x) from foo; 
+
+explain select count(x), max(x), min(x) from foo; 
+
+select count(x), max(x), min(x) from foo; 

http://git-wip-us.apache.org/repos/asf/hive/blob/34ced306/ql/src/test/results/clientpositive/llap/stats_date.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/stats_date.q.out b/ql/src/test/results/clientpositive/llap/stats_date.q.out
new file mode 100644
index 0000000..3ccf400
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/stats_date.q.out
@@ -0,0 +1,80 @@
+PREHOOK: query: create table foo(x date, y timestamp) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@foo
+POSTHOOK: query: create table foo(x date, y timestamp) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@foo
+PREHOOK: query: insert into foo values('1999-01-01', '1999-01-01 00:00:01'), ('2018-01-01', '2018-01-01 23:23:59')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@foo
+POSTHOOK: query: insert into foo values('1999-01-01', '1999-01-01 00:00:01'), ('2018-01-01', '2018-01-01 23:23:59')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@foo
+POSTHOOK: Lineage: foo.x SCRIPT []
+POSTHOOK: Lineage: foo.y SCRIPT []
+PREHOOK: query: analyze table foo compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+PREHOOK: Output: default@foo
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table foo compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+POSTHOOK: Output: default@foo
+#### A masked pattern was here ####
+Warning: Value had a \n character in it.
+PREHOOK: query: explain select max(x) from foo
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select max(x) from foo
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select count(x) from foo
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(x) from foo
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select count(x), max(x), min(x) from foo
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(x), max(x), min(x) from foo
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(x), max(x), min(x) from foo
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+#### A masked pattern was here ####
+POSTHOOK: query: select count(x), max(x), min(x) from foo
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+#### A masked pattern was here ####
+2	2018-01-01	1999-01-01