You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/02/21 23:45:35 UTC
hive git commit: HIVE-18573 : Use proper Calcite operator instead of
UDFs (Slim Bouguerra via Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master ad87176c7 -> dcb3817d6
HIVE-18573 : Use proper Calcite operator instead of UDFs (Slim Bouguerra via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dcb3817d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dcb3817d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dcb3817d
Branch: refs/heads/master
Commit: dcb3817d6d1360b816e8687bbae8d7aa62dc2b20
Parents: ad87176
Author: Slim Bouguerra <sl...@gmail.com>
Authored: Mon Jan 29 13:49:00 2018 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Wed Feb 21 15:44:46 2018 -0800
----------------------------------------------------------------------
.../calcite/reloperators/HiveConcat.java | 35 +++++
.../calcite/reloperators/HiveExtractDate.java | 3 +-
.../translator/SqlFunctionConverter.java | 41 +++++
.../llap/bucket_map_join_tez_empty.q.out | 4 +-
.../clientpositive/llap/subquery_in.q.out | 4 +-
.../clientpositive/llap/subquery_notin.q.out | 2 +-
.../clientpositive/llap/subquery_scalar.q.out | 149 ++++++++-----------
.../clientpositive/spark/subquery_in.q.out | 4 +-
.../clientpositive/spark/subquery_notin.q.out | 2 +-
9 files changed, 148 insertions(+), 96 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java
new file mode 100644
index 0000000..36c34cc
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlSpecialOperator;
+import org.apache.calcite.sql.type.InferTypes;
+import org.apache.calcite.sql.type.ReturnTypes;
+
+public class HiveConcat extends SqlSpecialOperator {
+ public static final SqlSpecialOperator INSTANCE = new HiveConcat();
+
+ private HiveConcat() {
+ super("||", SqlKind.OTHER_FUNCTION, 30, true, ReturnTypes.VARCHAR_2000,
+ InferTypes.RETURN_TYPE, null
+ );
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java
index 4099733..a43f406 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java
@@ -22,6 +22,7 @@ import java.util.Set;
import org.apache.calcite.sql.SqlFunction;
import org.apache.calcite.sql.SqlFunctionCategory;
import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.fun.SqlExtractFunction;
import org.apache.calcite.sql.type.OperandTypes;
import org.apache.calcite.sql.type.ReturnTypes;
import org.apache.calcite.sql.type.SqlTypeTransforms;
@@ -43,7 +44,7 @@ public class HiveExtractDate extends SqlFunction {
Sets.newHashSet(YEAR, QUARTER, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND);
private HiveExtractDate(String name) {
- super(name, SqlKind.EXTRACT,
+ super(name, SqlKind.EXTRACT,
ReturnTypes.cascade(ReturnTypes.INTEGER, SqlTypeTransforms.FORCE_NULLABLE), null,
OperandTypes.INTERVALINTERVAL_INTERVALDATETIME,
SqlFunctionCategory.SYSTEM);
http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
index 3f2eaef..cb0c2b1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
@@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunc
import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlMinMaxAggFunction;
import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveBetween;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveConcat;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn;
@@ -235,6 +236,8 @@ public class SqlFunctionConverter {
case CASE:
case EXTRACT:
case FLOOR:
+ case CEIL:
+ case LIKE:
case OTHER_FUNCTION:
node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION");
node.addChild((ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text));
@@ -398,6 +401,44 @@ public class SqlFunctionConverter {
hToken(HiveParser.Identifier, "floor_minute"));
registerFunction("floor_second", HiveFloorDate.SECOND,
hToken(HiveParser.Identifier, "floor_second"));
+ registerFunction("power", SqlStdOperatorTable.POWER, hToken(HiveParser.Identifier, "power"));
+ registerDuplicateFunction("pow", SqlStdOperatorTable.POWER,
+ hToken(HiveParser.Identifier, "power")
+ );
+ registerFunction("ceil", SqlStdOperatorTable.CEIL, hToken(HiveParser.Identifier, "ceil"));
+ registerDuplicateFunction("ceiling", SqlStdOperatorTable.CEIL,
+ hToken(HiveParser.Identifier, "ceil")
+ );
+ registerFunction("floor", SqlStdOperatorTable.FLOOR, hToken(HiveParser.Identifier, "floor"));
+ registerFunction("log10", SqlStdOperatorTable.LOG10, hToken(HiveParser.Identifier, "log10"));
+ registerFunction("ln", SqlStdOperatorTable.LN, hToken(HiveParser.Identifier, "ln"));
+ registerFunction("cos", SqlStdOperatorTable.COS, hToken(HiveParser.Identifier, "cos"));
+ registerFunction("sin", SqlStdOperatorTable.SIN, hToken(HiveParser.Identifier, "sin"));
+ registerFunction("tan", SqlStdOperatorTable.TAN, hToken(HiveParser.Identifier, "tan"));
+ registerFunction("concat", HiveConcat.INSTANCE,
+ hToken(HiveParser.Identifier, "concat")
+ );
+ registerFunction("substring", SqlStdOperatorTable.SUBSTRING,
+ hToken(HiveParser.Identifier, "substring")
+ );
+ registerFunction("like", SqlStdOperatorTable.LIKE, hToken(HiveParser.Identifier, "like"));
+ registerFunction("exp", SqlStdOperatorTable.EXP, hToken(HiveParser.Identifier, "exp"));
+ registerFunction("div", SqlStdOperatorTable.DIVIDE_INTEGER,
+ hToken(HiveParser.DIV, "div")
+ );
+ registerFunction("sqrt", SqlStdOperatorTable.SQRT, hToken(HiveParser.Identifier, "sqrt"));
+ registerFunction("lower", SqlStdOperatorTable.LOWER, hToken(HiveParser.Identifier, "lower"));
+ registerFunction("upper", SqlStdOperatorTable.UPPER, hToken(HiveParser.Identifier, "upper"));
+ registerFunction("abs", SqlStdOperatorTable.ABS, hToken(HiveParser.Identifier, "abs"));
+ registerFunction("char_length", SqlStdOperatorTable.CHAR_LENGTH,
+ hToken(HiveParser.Identifier, "char_length")
+ );
+ registerDuplicateFunction("character_length", SqlStdOperatorTable.CHAR_LENGTH,
+ hToken(HiveParser.Identifier, "char_length")
+ );
+ registerFunction("length", SqlStdOperatorTable.CHARACTER_LENGTH,
+ hToken(HiveParser.Identifier, "length")
+ );
}
private void registerFunction(String name, SqlOperator calciteFn, HiveToken hiveToken) {
http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out
index 33825da..08df574 100644
--- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out
@@ -71,10 +71,10 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
1 Map 2
- Statistics: Num rows: 2 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 2 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/test/results/clientpositive/llap/subquery_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
index d1ee21b..b5f9641 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
@@ -1920,7 +1920,7 @@ STAGE PLANS:
alias: part
Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: floor(p_retailprice) is not null (type: boolean)
+ predicate: p_retailprice is not null (type: boolean)
Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
@@ -1989,7 +1989,7 @@ STAGE PLANS:
outputColumnNames: _col1
Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: floor(_col1) is not null (type: boolean)
+ predicate: _col1 is not null (type: boolean)
Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: floor(_col1) (type: bigint)
http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
index e894a44..50c18c8 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
@@ -2509,7 +2509,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12
Statistics: Num rows: 26 Data size: 16538 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: ((_col12 is null and floor(_col7) is not null and (_col10 >= _col9)) or (_col9 = 0)) (type: boolean)
+ predicate: ((_col12 is null and _col7 is not null and (_col10 >= _col9)) or (_col9 = 0)) (type: boolean)
Statistics: Num rows: 26 Data size: 16538 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
index 15535f5..cec3daa 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
@@ -3161,26 +3161,24 @@ STAGE PLANS:
Reduce Operator Tree:
Merge Join Operator
condition map:
- Left Outer Join 0 to 1
+ Inner Join 0 to 1
keys:
0 _col2 (type: int)
1 _col2 (type: int)
outputColumnNames: _col0, _col1, _col3, _col4
- Statistics: Num rows: 26 Data size: 6634 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (_col1 like CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END) (type: boolean)
- Statistics: Num rows: 13 Data size: 3317 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ residual filter predicates: {(_col1 like CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END)}
+ Statistics: Num rows: 8 Data size: 2504 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 4
Execution mode: llap
Reduce Operator Tree:
@@ -3258,8 +3256,7 @@ POSTHOOK: Input: default@part
85768
86428
90681
-Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
-Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product
+Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: explain select * from part_null where p_name NOT LIKE (select min(p_name) from part_null) AND p_brand NOT IN (select p_name from part)
PREHOOK: type: QUERY
POSTHOOK: query: explain select * from part_null where p_name NOT LIKE (select min(p_name) from part_null) AND p_brand NOT IN (select p_name from part)
@@ -3273,12 +3270,11 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 8 (XPROD_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE)
- Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE)
- Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE)
- Reducer 9 <- Map 7 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE), Reducer 7 (XPROD_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE)
+ Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+ Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE)
+ Reducer 8 <- Map 6 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -3296,7 +3292,7 @@ STAGE PLANS:
value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
Execution mode: llap
LLAP IO: no inputs
- Map 5
+ Map 4
Map Operator Tree:
TableScan
alias: part_null
@@ -3316,7 +3312,7 @@ STAGE PLANS:
value expressions: _col0 (type: string)
Execution mode: llap
LLAP IO: no inputs
- Map 7
+ Map 6
Map Operator Tree:
TableScan
alias: part
@@ -3351,41 +3347,22 @@ STAGE PLANS:
Reduce Operator Tree:
Merge Join Operator
condition map:
- Left Outer Join 0 to 1
- keys:
- 0
- 1
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
- Statistics: Num rows: 1 Data size: 1489 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (not (_col1 like _col9)) (type: boolean)
- Statistics: Num rows: 1 Data size: 1489 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 1489 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 1489 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
- Reducer 3
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
Inner Join 0 to 1
+ Inner Join 0 to 2
keys:
0
1
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11
- Statistics: Num rows: 1 Data size: 1506 Basic stats: COMPLETE Column stats: NONE
+ 2
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ residual filter predicates: {(not (_col1 like _col9))}
+ Statistics: Num rows: 1 Data size: 1505 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col3 (type: string)
sort order: +
Map-reduce partition columns: _col3 (type: string)
- Statistics: Num rows: 1 Data size: 1506 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 1505 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint)
- Reducer 4
+ Reducer 3
Execution mode: llap
Reduce Operator Tree:
Merge Join Operator
@@ -3410,7 +3387,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
+ Reducer 5
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -3422,7 +3399,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string)
- Reducer 8
+ Reducer 7
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -3434,7 +3411,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint), _col1 (type: bigint)
- Reducer 9
+ Reducer 8
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -3459,8 +3436,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
-Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product
+Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: select * from part_null where p_name NOT LIKE (select min(p_name) from part_null) AND p_brand NOT IN (select p_name from part)
PREHOOK: type: QUERY
PREHOOK: Input: default@part
@@ -3496,7 +3472,7 @@ POSTHOOK: Input: default@part_null
85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull
86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully
90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
-Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: explain select * from part_null where p_brand NOT IN (select p_name from part) AND p_name NOT LIKE (select min(p_name) from part_null pp where part_null.p_type = pp.p_type)
PREHOOK: type: QUERY
POSTHOOK: query: explain select * from part_null where p_brand NOT IN (select p_name from part) AND p_name NOT LIKE (select min(p_name) from part_null pp where part_null.p_type = pp.p_type)
@@ -3631,26 +3607,24 @@ STAGE PLANS:
Reduce Operator Tree:
Merge Join Operator
condition map:
- Left Outer Join 0 to 1
+ Inner Join 0 to 1
keys:
0 _col4 (type: string)
1 _col2 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14
- Statistics: Num rows: 7 Data size: 982 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (not (_col1 like CASE WHEN (_col14 is null) THEN (null) ELSE (_col13) END)) (type: boolean)
+ residual filter predicates: {(not (_col1 like CASE WHEN (_col14 is null) THEN (null) ELSE (_col13) END))}
+ Statistics: Num rows: 4 Data size: 561 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 4 Data size: 561 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ File Output Operator
+ compressed: false
Statistics: Num rows: 4 Data size: 561 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 4 Data size: 561 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 6
Execution mode: llap
Reduce Operator Tree:
@@ -3707,7 +3681,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: select * from part_null where p_brand NOT IN (select p_name from part) AND p_name NOT LIKE (select min(p_name) from part_null pp where part_null.p_type = pp.p_type)
PREHOOK: type: QUERY
PREHOOK: Input: default@part
@@ -4339,7 +4313,7 @@ STAGE PLANS:
alias: part
Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (p_name is not null and p_type is not null) (type: boolean)
+ predicate: p_type is not null (type: boolean)
Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: p_name (type: string), p_brand (type: string), p_type (type: string)
@@ -4385,10 +4359,10 @@ STAGE PLANS:
0 _col1 (type: string), _col4 (type: string)
1 _col0 (type: string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 6 Data size: 1485 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 742 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 6 Data size: 1485 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 742 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -4398,29 +4372,30 @@ STAGE PLANS:
Reduce Operator Tree:
Merge Join Operator
condition map:
- Left Outer Join 0 to 1
+ Inner Join 0 to 1
keys:
0 _col2 (type: string)
1 _col2 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 26 Data size: 11062 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (not (_col1 like CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END)) (type: boolean)
- Statistics: Num rows: 13 Data size: 5625 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string), _col2 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE
+ residual filter predicates: {(not (_col1 like CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END))}
+ Statistics: Num rows: 7 Data size: 3535 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 7 Data size: 1575 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col0 is not null and _col1 is not null) (type: boolean)
+ Statistics: Num rows: 7 Data size: 1575 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: string), _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 6
Execution mode: llap
Reduce Operator Tree:
http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/test/results/clientpositive/spark/subquery_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out
index f89c146..5e48a5c 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out
@@ -1827,7 +1827,7 @@ STAGE PLANS:
alias: part
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: floor(p_retailprice) is not null (type: boolean)
+ predicate: p_retailprice is not null (type: boolean)
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
@@ -1890,7 +1890,7 @@ STAGE PLANS:
outputColumnNames: _col1
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: floor(_col1) is not null (type: boolean)
+ predicate: _col1 is not null (type: boolean)
Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: floor(_col1) (type: bigint)
http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/test/results/clientpositive/spark/subquery_notin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out
index b2a1972..e2f26a9 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out
@@ -2496,7 +2496,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12
Statistics: Num rows: 28 Data size: 3947 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((_col12 is null and floor(_col7) is not null and (_col10 >= _col9)) or (_col9 = 0)) (type: boolean)
+ predicate: ((_col12 is null and _col7 is not null and (_col10 >= _col9)) or (_col9 = 0)) (type: boolean)
Statistics: Num rows: 18 Data size: 2537 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)