You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2018/04/26 14:58:59 UTC
[35/50] [abbrv] hive git commit: HIVE-19247 : StatsOptimizer: Missing
stats fast-path for Date (Gopal V via Ashutosh Chauhan)
HIVE-19247 : StatsOptimizer: Missing stats fast-path for Date (Gopal V via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/34ced306
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/34ced306
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/34ced306
Branch: refs/heads/storage-branch-2.6
Commit: 34ced3062f0b5083049cf1c94aa6d5335ee923c7
Parents: 63923e7
Author: Gopal V <go...@apache.org>
Authored: Tue Apr 24 21:51:22 2018 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue Apr 24 21:51:22 2018 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 3 +-
.../hive/ql/optimizer/StatsOptimizer.java | 97 ++++++++++++++++++--
ql/src/test/queries/clientpositive/stats_date.q | 18 ++++
.../clientpositive/llap/stats_date.q.out | 80 ++++++++++++++++
4 files changed, 189 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/34ced306/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index f32b431..2c1a76d 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -877,7 +877,8 @@ minillaplocal.query.files=\
unionDistinct_3.q,\
vectorized_join46.q,\
vectorized_multi_output_select.q,\
- partialdhj.q
+ partialdhj.q,\
+ stats_date.q
encrypted.query.files=encryption_join_unencrypted_tbl.q,\
encryption_insert_partition_static.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/34ced306/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index d26a48b..a574372 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hive.ql.optimizer;
+import java.sql.Date;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
@@ -30,6 +31,7 @@ import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
@@ -72,6 +74,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum;
import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
@@ -146,11 +150,12 @@ public class StatsOptimizer extends Transform {
}
enum StatType{
- Integeral,
+ Integer,
Double,
String,
Boolean,
Binary,
+ Date,
Unsupported
}
@@ -163,7 +168,6 @@ public class StatsOptimizer extends Transform {
Object cast(long longValue) { return (short)longValue; } },
TINYINT { @Override
Object cast(long longValue) { return (byte)longValue; } };
-
abstract Object cast(long longValue);
}
@@ -175,6 +179,13 @@ public class StatsOptimizer extends Transform {
abstract Object cast(double doubleValue);
}
+
+ enum DateSubType {
+ DAYS {@Override
+ Object cast(long longValue) { return (new DateWritable((int)longValue)).get();}
+ };
+ abstract Object cast(long longValue);
+ }
enum GbyKeyType {
NULL, CONSTANT, OTHER
@@ -182,7 +193,7 @@ public class StatsOptimizer extends Transform {
private StatType getType(String origType) {
if (serdeConstants.IntegralTypes.contains(origType)) {
- return StatType.Integeral;
+ return StatType.Integer;
} else if (origType.equals(serdeConstants.DOUBLE_TYPE_NAME) ||
origType.equals(serdeConstants.FLOAT_TYPE_NAME)) {
return StatType.Double;
@@ -192,6 +203,8 @@ public class StatsOptimizer extends Transform {
return StatType.Boolean;
} else if (origType.equals(serdeConstants.STRING_TYPE_NAME)) {
return StatType.String;
+ } else if (origType.equals(serdeConstants.DATE_TYPE_NAME)) {
+ return StatType.Date;
}
return StatType.Unsupported;
}
@@ -199,7 +212,7 @@ public class StatsOptimizer extends Transform {
private Long getNullcountFor(StatType type, ColumnStatisticsData statData) {
switch(type) {
- case Integeral :
+ case Integer :
return statData.getLongStats().getNumNulls();
case Double:
return statData.getDoubleStats().getNumNulls();
@@ -209,6 +222,8 @@ public class StatsOptimizer extends Transform {
return statData.getBooleanStats().getNumNulls();
case Binary:
return statData.getBinaryStats().getNumNulls();
+ case Date:
+ return statData.getDateStats().getNumNulls();
default:
return null;
}
@@ -515,7 +530,7 @@ public class StatsOptimizer extends Transform {
ColumnStatisticsData statData = stats.get(0).getStatsData();
String name = colDesc.getTypeString().toUpperCase();
switch (type) {
- case Integeral: {
+ case Integer: {
LongSubType subType = LongSubType.valueOf(name);
LongColumnStatsData lstats = statData.getLongStats();
if (lstats.isSetHighValue()) {
@@ -535,6 +550,15 @@ public class StatsOptimizer extends Transform {
}
break;
}
+ case Date: {
+ DateColumnStatsData dstats = statData.getDateStats();
+ if (dstats.isSetHighValue()) {
+ oneRow.add(DateSubType.DAYS.cast(dstats.getHighValue().getDaysSinceEpoch()));
+ } else {
+ oneRow.add(null);
+ }
+ break;
+ }
default:
// unsupported type
Logger.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
@@ -546,7 +570,7 @@ public class StatsOptimizer extends Transform {
tsOp.getConf().getAlias(), tsOp).getPartitions();
String name = colDesc.getTypeString().toUpperCase();
switch (type) {
- case Integeral: {
+ case Integer: {
LongSubType subType = LongSubType.valueOf(name);
Long maxVal = null;
@@ -598,6 +622,30 @@ public class StatsOptimizer extends Transform {
}
break;
}
+ case Date: {
+ Long maxVal = null;
+ Collection<List<ColumnStatisticsObj>> result =
+ verifyAndGetPartColumnStats(hive, tbl, colName, parts);
+ if (result == null) {
+ return null; // logging inside
+ }
+ for (List<ColumnStatisticsObj> statObj : result) {
+ ColumnStatisticsData statData = validateSingleColStat(statObj);
+ if (statData == null) return null;
+ DateColumnStatsData dstats = statData.getDateStats();
+ if (!dstats.isSetHighValue()) {
+ continue;
+ }
+ long curVal = dstats.getHighValue().getDaysSinceEpoch();
+ maxVal = maxVal == null ? curVal : Math.max(maxVal, curVal);
+ }
+ if (maxVal != null) {
+ oneRow.add(DateSubType.DAYS.cast(maxVal));
+ } else {
+ oneRow.add(null);
+ }
+ break;
+ }
default:
Logger.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
"metadata optimizer for column : " + colName);
@@ -619,7 +667,7 @@ public class StatsOptimizer extends Transform {
.get(0).getStatsData();
String name = colDesc.getTypeString().toUpperCase();
switch (type) {
- case Integeral: {
+ case Integer: {
LongSubType subType = LongSubType.valueOf(name);
LongColumnStatsData lstats = statData.getLongStats();
if (lstats.isSetLowValue()) {
@@ -639,6 +687,15 @@ public class StatsOptimizer extends Transform {
}
break;
}
+ case Date: {
+ DateColumnStatsData dstats = statData.getDateStats();
+ if (dstats.isSetLowValue()) {
+ oneRow.add(DateSubType.DAYS.cast(dstats.getLowValue().getDaysSinceEpoch()));
+ } else {
+ oneRow.add(null);
+ }
+ break;
+ }
default: // unsupported type
Logger.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
"metadata optimizer for column : " + colName);
@@ -648,7 +705,7 @@ public class StatsOptimizer extends Transform {
Set<Partition> parts = pctx.getPrunedPartitions(tsOp.getConf().getAlias(), tsOp).getPartitions();
String name = colDesc.getTypeString().toUpperCase();
switch(type) {
- case Integeral: {
+ case Integer: {
LongSubType subType = LongSubType.valueOf(name);
Long minVal = null;
@@ -700,6 +757,30 @@ public class StatsOptimizer extends Transform {
}
break;
}
+ case Date: {
+ Long minVal = null;
+ Collection<List<ColumnStatisticsObj>> result =
+ verifyAndGetPartColumnStats(hive, tbl, colName, parts);
+ if (result == null) {
+ return null; // logging inside
+ }
+ for (List<ColumnStatisticsObj> statObj : result) {
+ ColumnStatisticsData statData = validateSingleColStat(statObj);
+ if (statData == null) return null;
+ DateColumnStatsData dstats = statData.getDateStats();
+ if (!dstats.isSetLowValue()) {
+ continue;
+ }
+ long curVal = dstats.getLowValue().getDaysSinceEpoch();
+ minVal = minVal == null ? curVal : Math.min(minVal, curVal);
+ }
+ if (minVal != null) {
+ oneRow.add(DateSubType.DAYS.cast(minVal));
+ } else {
+ oneRow.add(null);
+ }
+ break;
+ }
default: // unsupported type
Logger.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " +
"metadata optimizer for column : " + colName);
http://git-wip-us.apache.org/repos/asf/hive/blob/34ced306/ql/src/test/queries/clientpositive/stats_date.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/stats_date.q b/ql/src/test/queries/clientpositive/stats_date.q
new file mode 100644
index 0000000..da1ef58
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/stats_date.q
@@ -0,0 +1,18 @@
+
+create table foo(x date, y timestamp) stored as orc;
+
+insert into foo values('1999-01-01', '1999-01-01 00:00:01'), ('2018-01-01', '2018-01-01 23:23:59');
+
+analyze table foo compute statistics for columns;
+
+set hive.compute.query.using.stats=true;
+
+set test.comment=All queries need to be just metadata fetch tasks
+
+explain select min(x) from foo;
+explain select max(x) from foo;
+explain select count(x) from foo;
+
+explain select count(x), max(x), min(x) from foo;
+
+select count(x), max(x), min(x) from foo;
http://git-wip-us.apache.org/repos/asf/hive/blob/34ced306/ql/src/test/results/clientpositive/llap/stats_date.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/stats_date.q.out b/ql/src/test/results/clientpositive/llap/stats_date.q.out
new file mode 100644
index 0000000..3ccf400
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/stats_date.q.out
@@ -0,0 +1,80 @@
+PREHOOK: query: create table foo(x date, y timestamp) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@foo
+POSTHOOK: query: create table foo(x date, y timestamp) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@foo
+PREHOOK: query: insert into foo values('1999-01-01', '1999-01-01 00:00:01'), ('2018-01-01', '2018-01-01 23:23:59')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@foo
+POSTHOOK: query: insert into foo values('1999-01-01', '1999-01-01 00:00:01'), ('2018-01-01', '2018-01-01 23:23:59')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@foo
+POSTHOOK: Lineage: foo.x SCRIPT []
+POSTHOOK: Lineage: foo.y SCRIPT []
+PREHOOK: query: analyze table foo compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+PREHOOK: Output: default@foo
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table foo compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+POSTHOOK: Output: default@foo
+#### A masked pattern was here ####
+Warning: Value had a \n character in it.
+PREHOOK: query: explain select max(x) from foo
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select max(x) from foo
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select count(x) from foo
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(x) from foo
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select count(x), max(x), min(x) from foo
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(x), max(x), min(x) from foo
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(x), max(x), min(x) from foo
+PREHOOK: type: QUERY
+PREHOOK: Input: default@foo
+#### A masked pattern was here ####
+POSTHOOK: query: select count(x), max(x), min(x) from foo
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@foo
+#### A masked pattern was here ####
+2 2018-01-01 1999-01-01