You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/11/03 05:18:16 UTC
[hive] branch master updated: HIVE-22274: Upgrade Calcite version
to 1.21.0 (Steve Carlin, reviewed by Jesus Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 2a7bbba HIVE-22274: Upgrade Calcite version to 1.21.0 (Steve Carlin, reviewed by Jesus Camacho Rodriguez)
2a7bbba is described below
commit 2a7bbba8ca9d9f28b8a5a2af7ce5bbb8348614aa
Author: Steve Carlin <sc...@cloudera.com>
AuthorDate: Sat Nov 2 22:16:39 2019 -0700
HIVE-22274: Upgrade Calcite version to 1.21.0 (Steve Carlin, reviewed by Jesus Camacho Rodriguez)
Close apache/hive#809
---
.../positive/accumulo_predicate_pushdown.q.out | 27 +--
.../results/positive/hbase_ppd_key_range.q.out | 27 +--
.../src/test/results/positive/hbase_pushdown.q.out | 27 +--
.../org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java | 1 -
.../apache/hive/jdbc/TestNewGetSplitsFormat.java | 6 +-
pom.xml | 2 +-
.../calcite/HiveDefaultRelMetadataProvider.java | 3 +
.../ql/optimizer/calcite/HiveRelFactories.java | 25 +--
.../hive/ql/optimizer/calcite/HiveRelOptUtil.java | 4 +-
.../calcite/HiveSubQRemoveRelBuilder.java | 6 +-
.../calcite/reloperators/HiveAggregate.java | 9 +-
.../calcite/reloperators/HiveMultiJoin.java | 2 +-
.../calcite/reloperators/HiveSemiJoin.java | 23 +--
.../rules/HiveAggregateJoinTransposeRule.java | 2 +-
.../calcite/rules/HiveJoinAddNotNullRule.java | 4 +-
.../calcite/rules/HiveJoinConstraintsRule.java | 5 +-
.../rules/HiveProjectJoinTransposeRule.java | 3 +-
.../calcite/rules/HiveRelDecorrelator.java | 36 ++--
.../calcite/rules/HiveRemoveGBYSemiJoinRule.java | 5 +-
.../calcite/rules/jdbc/JDBCSortPushDownRule.java | 27 ++-
.../calcite/stats/HiveRelMdDistinctRowCount.java | 25 +--
.../calcite/stats/HiveRelMdMaxRowCount.java | 83 +++++++++
.../calcite/stats/HiveRelMdPredicates.java | 17 +-
.../optimizer/calcite/stats/HiveRelMdRowCount.java | 17 +-
.../calcite/stats/HiveRelMdSelectivity.java | 5 +-
.../ql/optimizer/calcite/stats/HiveRelMdSize.java | 5 +-
.../optimizer/calcite/translator/ASTBuilder.java | 12 +-
.../optimizer/calcite/translator/ASTConverter.java | 7 +-
.../calcite/translator/HiveOpConverter.java | 12 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 5 +-
.../results/clientpositive/acid_nullscan.q.out | 41 ++---
.../clientpositive/annotate_stats_filter.q.out | 162 ++----------------
.../clientpositive/annotate_stats_part.q.out | 14 +-
.../results/clientpositive/beeline/mapjoin2.q.out | 76 +++++----
.../clientpositive/cbo_rp_simple_select.q.out | 14 +-
.../results/clientpositive/cbo_simple_select.q.out | 14 +-
ql/src/test/results/clientpositive/concat_op.q.out | 6 +-
.../test/results/clientpositive/constprog3.q.out | 33 ++--
.../druid/druidmini_expressions.q.out | 16 +-
.../druid/druidmini_extractTime.q.out | 8 +-
.../clientpositive/druid/druidmini_floorTime.q.out | 6 +-
.../test/results/clientpositive/filter_union.q.out | 36 ++--
ql/src/test/results/clientpositive/fold_case.q.out | 49 ++----
.../clientpositive/fold_eq_with_case_when.q.out | 27 +--
ql/src/test/results/clientpositive/fold_when.q.out | 108 +-----------
.../results/clientpositive/infer_const_type.q.out | 27 +--
.../results/clientpositive/infer_join_preds.q.out | 95 ++++++-----
ql/src/test/results/clientpositive/input9.q.out | 56 +++---
.../clientpositive/llap/bucketpruning1.q.out | 50 +-----
.../clientpositive/llap/cbo_simple_select.q.out | 11 +-
.../clientpositive/llap/constprog_semijoin.q.out | 60 +------
.../test/results/clientpositive/llap/dec_str.q.out | 22 +--
.../llap/external_jdbc_table_perf.q.out | 6 +-
.../results/clientpositive/llap/filter_union.q.out | 61 ++++---
.../results/clientpositive/llap/mapjoin2.q.out | 58 ++++---
.../results/clientpositive/llap/mapjoin_hint.q.out | 154 ++++++++++-------
.../results/clientpositive/llap/mergejoin.q.out | 22 ++-
.../clientpositive/llap/multi_in_clause.q.out | 4 +-
.../results/clientpositive/llap/semijoin.q.out | 4 +-
.../results/clientpositive/llap/subquery_ALL.q.out | 4 +-
.../results/clientpositive/llap/subquery_ANY.q.out | 6 +-
.../clientpositive/llap/subquery_exists.q.out | 4 +-
.../clientpositive/llap/subquery_multi.q.out | 4 +-
.../clientpositive/llap/subquery_null_agg.q.out | 34 ++--
.../clientpositive/llap/subquery_scalar.q.out | 81 +++++----
.../clientpositive/llap/union_assertion_type.q.out | 77 +++++++--
ql/src/test/results/clientpositive/mapjoin2.q.out | 76 +++++----
.../test/results/clientpositive/masking_10.q.out | 74 ++++++--
ql/src/test/results/clientpositive/mergejoin.q.out | 22 ++-
.../clientpositive/optimize_filter_literal.q.out | 7 +-
.../clientpositive/partition_boolexpr.q.out | 8 +-
.../clientpositive/perf/tez/cbo_query10.q.out | 2 +-
.../clientpositive/perf/tez/cbo_query14.q.out | 6 +-
.../clientpositive/perf/tez/cbo_query16.q.out | 2 +-
.../clientpositive/perf/tez/cbo_query23.q.out | 4 +-
.../clientpositive/perf/tez/cbo_query35.q.out | 2 +-
.../clientpositive/perf/tez/cbo_query69.q.out | 2 +-
.../clientpositive/perf/tez/cbo_query83.q.out | 6 +-
.../clientpositive/perf/tez/cbo_query94.q.out | 2 +-
.../perf/tez/constraints/cbo_query10.q.out | 2 +-
.../perf/tez/constraints/cbo_query16.q.out | 2 +-
.../perf/tez/constraints/cbo_query23.q.out | 4 +-
.../perf/tez/constraints/cbo_query35.q.out | 2 +-
.../perf/tez/constraints/cbo_query69.q.out | 2 +-
.../perf/tez/constraints/cbo_query83.q.out | 6 +-
.../perf/tez/constraints/cbo_query94.q.out | 2 +-
ql/src/test/results/clientpositive/plan_json.q.out | 2 +-
.../test/results/clientpositive/pointlookup.q.out | 4 +-
ql/src/test/results/clientpositive/ppd_join5.q.out | 111 ++++++------
.../test/results/clientpositive/ppd_udf_col.q.out | 118 +++++--------
.../clientpositive/remove_exprs_stats.q.out | 189 ++-------------------
.../clientpositive/spark/cbo_simple_select.q.out | 11 +-
.../clientpositive/spark/constprog_semijoin.q.out | 148 +---------------
.../results/clientpositive/spark/ppd_join5.q.out | 97 ++++++-----
.../results/clientpositive/spark/semijoin.q.out | 4 +-
.../clientpositive/spark/subquery_exists.q.out | 4 +-
.../clientpositive/spark/subquery_multi.q.out | 4 +-
.../clientpositive/spark/subquery_null_agg.q.out | 34 ++--
.../clientpositive/spark/subquery_scalar.q.out | 73 ++++----
.../results/clientpositive/subquery_exists.q.out | 4 +-
.../clientpositive/vector_outer_join3.q.out | 6 +-
.../clientpositive/vector_outer_join4.q.out | 6 +-
.../clientpositive/vector_outer_join6.q.out | 4 +-
103 files changed, 1210 insertions(+), 1724 deletions(-)
diff --git a/accumulo-handler/src/test/results/positive/accumulo_predicate_pushdown.q.out b/accumulo-handler/src/test/results/positive/accumulo_predicate_pushdown.q.out
index 960027e..0d5b50e 100644
--- a/accumulo-handler/src/test/results/positive/accumulo_predicate_pushdown.q.out
+++ b/accumulo-handler/src/test/results/positive/accumulo_predicate_pushdown.q.out
@@ -327,35 +327,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@accumulo_pushdown
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: accumulo_pushdown
- Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
diff --git a/hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out b/hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out
index f6c5ea1..01efee4 100644
--- a/hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out
+++ b/hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out
@@ -327,35 +327,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@hbase_pushdown
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: hbase_pushdown
- Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
diff --git a/hbase-handler/src/test/results/positive/hbase_pushdown.q.out b/hbase-handler/src/test/results/positive/hbase_pushdown.q.out
index 963acc1..2642bda 100644
--- a/hbase-handler/src/test/results/positive/hbase_pushdown.q.out
+++ b/hbase-handler/src/test/results/positive/hbase_pushdown.q.out
@@ -216,35 +216,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@hbase_pushdown
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: hbase_pushdown
- Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java
index f4e9f9a..183f456 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java
@@ -528,7 +528,6 @@ public abstract class BaseJdbcWithMiniLlap {
}
InputSplit[] splits = inputFormat.getSplits(job, numSplits);
- assertTrue(splits.length > 0);
// Fetch rows from splits
boolean first = true;
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestNewGetSplitsFormat.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestNewGetSplitsFormat.java
index e2884d1..31848c5 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestNewGetSplitsFormat.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestNewGetSplitsFormat.java
@@ -79,7 +79,11 @@ public class TestNewGetSplitsFormat extends BaseJdbcWithMiniLlap {
}
InputSplit[] splits = inputFormat.getSplits(job, numSplits);
- assertTrue(splits.length > 2);
+
+ if (splits.length <= 1) {
+ return 0;
+ }
+
// populate actual splits with schema and planBytes[]
LlapInputSplit schemaSplit = (LlapInputSplit) splits[0];
diff --git a/pom.xml b/pom.xml
index 6dbff13..59545fe 100644
--- a/pom.xml
+++ b/pom.xml
@@ -127,7 +127,7 @@
<avatica.version>1.12.0</avatica.version>
<avro.version>1.8.2</avro.version>
<bonecp.version>0.8.0.RELEASE</bonecp.version>
- <calcite.version>1.19.0</calcite.version>
+ <calcite.version>1.21.0</calcite.version>
<datanucleus-api-jdo.version>4.2.4</datanucleus-api-jdo.version>
<datanucleus-core.version>4.1.17</datanucleus-core.version>
<datanucleus-rdbms.version>4.1.19</datanucleus-rdbms.version>
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
index 653a3c1..c1ab64c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
@@ -61,6 +61,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistribution;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdMemory;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdParallelism;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdPredicates;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdMaxRowCount;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRowCount;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRuntimeRowCount;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSelectivity;
@@ -83,6 +84,7 @@ public class HiveDefaultRelMetadataProvider {
new HiveRelMdCost(HiveDefaultCostModel.getCostModel()).getMetadataProvider(),
HiveRelMdSelectivity.SOURCE,
HiveRelMdRuntimeRowCount.SOURCE,
+ HiveRelMdMaxRowCount.SOURCE,
HiveRelMdUniqueKeys.SOURCE,
HiveRelMdColumnUniqueness.SOURCE,
HiveRelMdSize.SOURCE,
@@ -154,6 +156,7 @@ public class HiveDefaultRelMetadataProvider {
new HiveRelMdCost(HiveOnTezCostModel.getCostModel(hiveConf)).getMetadataProvider(),
HiveRelMdSelectivity.SOURCE,
HiveRelMdRowCount.SOURCE,
+ HiveRelMdMaxRowCount.SOURCE,
HiveRelMdUniqueKeys.SOURCE,
HiveRelMdColumnUniqueness.SOURCE,
HiveRelMdSize.SOURCE,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
index d96b1dc..d50c517 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelFactories.java
@@ -124,7 +124,7 @@ public class HiveRelFactories {
*/
private static class HiveFilterFactoryImpl implements FilterFactory {
@Override
- public RelNode createFilter(RelNode child, RexNode condition) {
+ public RelNode createFilter(RelNode child, RexNode condition, Set<CorrelationId> variablesSet) {
RelOptCluster cluster = child.getCluster();
HiveFilter filter = new HiveFilter(cluster, TraitsUtil.getDefaultTraitSet(cluster), child, condition);
return filter;
@@ -153,6 +153,11 @@ public class HiveRelFactories {
@Override
public RelNode createJoin(RelNode left, RelNode right, RexNode condition, JoinRelType joinType,
Set<String> variablesStopped, boolean semiJoinDone) {
+ if (joinType == JoinRelType.SEMI) {
+ final JoinInfo joinInfo = JoinInfo.of(left, right, condition);
+ final RelOptCluster cluster = left.getCluster();
+ return HiveSemiJoin.getSemiJoin(cluster, left.getTraitSet(), left, right, condition);
+ }
return HiveJoin.getJoin(left.getCluster(), left, right, condition, joinType);
}
@@ -161,6 +166,11 @@ public class HiveRelFactories {
Set<CorrelationId> variablesSet, JoinRelType joinType, boolean semiJoinDone) {
// According to calcite, it is going to be removed before Calcite-2.0
// TODO: to handle CorrelationId
+ if (joinType == JoinRelType.SEMI) {
+ final JoinInfo joinInfo = JoinInfo.of(left, right, condition);
+ final RelOptCluster cluster = left.getCluster();
+ return HiveSemiJoin.getSemiJoin(cluster, left.getTraitSet(), left, right, condition);
+ }
return HiveJoin.getJoin(left.getCluster(), left, right, condition, joinType);
}
}
@@ -176,8 +186,7 @@ public class HiveRelFactories {
RexNode condition) {
final JoinInfo joinInfo = JoinInfo.of(left, right, condition);
final RelOptCluster cluster = left.getCluster();
- return HiveSemiJoin.getSemiJoin(cluster, left.getTraitSet(), left, right, condition,
- joinInfo.leftKeys, joinInfo.rightKeys);
+ return HiveSemiJoin.getSemiJoin(cluster, left.getTraitSet(), left, right, condition);
}
}
@@ -197,15 +206,11 @@ public class HiveRelFactories {
private static class HiveAggregateFactoryImpl implements AggregateFactory {
@Override
- public RelNode createAggregate(RelNode child, boolean indicator,
+ public RelNode createAggregate(RelNode child,
ImmutableBitSet groupSet, ImmutableList<ImmutableBitSet> groupSets,
List<AggregateCall> aggCalls) {
- if (indicator) {
- throw new IllegalStateException("Hive does not support indicator columns but Calcite "
- + "created an Aggregate operator containing them");
- }
- return new HiveAggregate(child.getCluster(), child.getTraitSet(), child,
- groupSet, groupSets, aggCalls);
+ return new HiveAggregate(child.getCluster(), child.getTraitSet(), child,
+ groupSet, groupSets, aggCalls);
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
index b8380d6..d215736a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
@@ -741,7 +741,7 @@ public class HiveRelOptUtil extends RelOptUtil {
final RelNode nonFkInput = leftInputPotentialFK ? join.getRight() : join.getLeft();
final RewritablePKFKJoinInfo nonRewritable = RewritablePKFKJoinInfo.of(false, null);
- if (joinType != JoinRelType.INNER) {
+ if (joinType != JoinRelType.INNER && !join.isSemiJoin()) {
// If it is not an inner, we transform it as the metadata
// providers for expressions do not pull information through
// outer join (as it would not be correct)
@@ -848,7 +848,7 @@ public class HiveRelOptUtil extends RelOptUtil {
if (ecT.getEquivalenceClassesMap().containsKey(uniqueKeyColumnRef) &&
ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).contains(foreignKeyColumnRef)) {
if (foreignKeyColumnType.isNullable()) {
- if (joinType == JoinRelType.INNER) {
+ if (joinType == JoinRelType.INNER || join.isSemiJoin()) {
// If it is nullable and it is an INNER, we just need a IS NOT NULL filter
RexNode originalCondOp = refToRex.get(foreignKeyColumnRef);
assert originalCondOp != null;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
index 05d1dc6..dfe2913 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
@@ -50,7 +50,6 @@ import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.rex.RexShuttle;
import org.apache.calcite.schema.SchemaPlus;
import org.apache.calcite.server.CalciteServerStatement;
-import org.apache.calcite.sql.SemiJoinType;
import org.apache.calcite.sql.SqlAggFunction;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlOperator;
@@ -1141,11 +1140,10 @@ public class HiveSubQRemoveRelBuilder {
}
if(createSemiJoin) {
join = correlateFactory.createCorrelate(left.rel, right.rel, id,
- requiredColumns, SemiJoinType.SEMI);
+ requiredColumns, JoinRelType.SEMI);
} else {
join = correlateFactory.createCorrelate(left.rel, right.rel, id,
- requiredColumns, SemiJoinType.of(joinType));
-
+ requiredColumns, joinType);
}
} else {
join = joinFactory.createJoin(left.rel, right.rel, condition,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java
index 50466e0..6b841a5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java
@@ -53,14 +53,9 @@ public class HiveAggregate extends Aggregate implements HiveRelNode {
@Override
public Aggregate copy(RelTraitSet traitSet, RelNode input,
- boolean indicator, ImmutableBitSet groupSet,
+ ImmutableBitSet groupSet,
List<ImmutableBitSet> groupSets, List<AggregateCall> aggCalls) {
- if (indicator) {
- throw new IllegalStateException("Hive does not support indicator columns but tried "
- + "to create an Aggregate operator containing them");
- }
- return new HiveAggregate(getCluster(), traitSet, input,
- groupSet, groupSets, aggCalls);
+ return new HiveAggregate(getCluster(), traitSet, input, groupSet, groupSets, aggCalls);
}
@Override
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveMultiJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveMultiJoin.java
index 7a8cf0a..0692194 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveMultiJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveMultiJoin.java
@@ -225,7 +225,7 @@ public final class HiveMultiJoin extends AbstractRelNode {
private boolean containsOuter() {
for (JoinRelType joinType : joinTypes) {
- if (joinType != JoinRelType.INNER) {
+ if (joinType.isOuterJoin()) {
return true;
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java
index d70ead4..1bdfea3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSemiJoin.java
@@ -24,9 +24,9 @@ import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelTraitSet;
import org.apache.calcite.rel.InvalidRelException;
import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinInfo;
import org.apache.calcite.rel.core.JoinRelType;
-import org.apache.calcite.rel.core.SemiJoin;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.util.ImmutableIntList;
@@ -35,8 +35,9 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Sets;
-public class HiveSemiJoin extends SemiJoin implements HiveRelNode {
+public class HiveSemiJoin extends Join implements HiveRelNode {
private final RexNode joinFilter;
@@ -46,12 +47,9 @@ public class HiveSemiJoin extends SemiJoin implements HiveRelNode {
RelTraitSet traitSet,
RelNode left,
RelNode right,
- RexNode condition,
- ImmutableIntList leftKeys,
- ImmutableIntList rightKeys) {
+ RexNode condition) {
try {
- HiveSemiJoin semiJoin = new HiveSemiJoin(cluster, traitSet, left, right,
- condition, leftKeys, rightKeys);
+ HiveSemiJoin semiJoin = new HiveSemiJoin(cluster, traitSet, left, right, condition);
return semiJoin;
} catch (InvalidRelException | CalciteSemanticException e) {
throw new RuntimeException(e);
@@ -62,10 +60,8 @@ public class HiveSemiJoin extends SemiJoin implements HiveRelNode {
RelTraitSet traitSet,
RelNode left,
RelNode right,
- RexNode condition,
- ImmutableIntList leftKeys,
- ImmutableIntList rightKeys) throws InvalidRelException, CalciteSemanticException {
- super(cluster, traitSet, left, right, condition, leftKeys, rightKeys);
+ RexNode condition) throws InvalidRelException, CalciteSemanticException {
+ super(cluster, traitSet, left, right, condition, JoinRelType.SEMI, Sets.newHashSet());
final List<RelDataTypeField> systemFieldList = ImmutableList.of();
List<List<RexNode>> joinKeyExprs = new ArrayList<List<RexNode>>();
List<Integer> filterNulls = new ArrayList<Integer>();
@@ -81,12 +77,11 @@ public class HiveSemiJoin extends SemiJoin implements HiveRelNode {
}
@Override
- public SemiJoin copy(RelTraitSet traitSet, RexNode condition,
+ public HiveSemiJoin copy(RelTraitSet traitSet, RexNode condition,
RelNode left, RelNode right, JoinRelType joinType, boolean semiJoinDone) {
try {
final JoinInfo joinInfo = JoinInfo.of(left, right, condition);
- HiveSemiJoin semijoin = new HiveSemiJoin(getCluster(), traitSet, left, right, condition,
- joinInfo.leftKeys, joinInfo.rightKeys);
+ HiveSemiJoin semijoin = new HiveSemiJoin(getCluster(), traitSet, left, right, condition);
// If available, copy state to registry for optimization rules
HiveRulesRegistry registry = semijoin.getCluster().getPlanner().getContext().unwrap(HiveRulesRegistry.class);
if (registry != null) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
index b9409cd..8edd0b0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java
@@ -98,7 +98,7 @@ public class HiveAggregateJoinTransposeRule extends AggregateJoinTransposeRule {
}
}
- // If it is not an inner join, we do not push the
+ // If it is not an inner join or a semi-join, we do not push the
// aggregate operator
if (join.getJoinType() != JoinRelType.INNER) {
return;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java
index 4e66de3..b2ff255 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java
@@ -91,8 +91,8 @@ public final class HiveJoinAddNotNullRule extends RelOptRule {
Set<String> leftPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 0));
Set<String> rightPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 1));
- boolean genPredOnLeft = join.getJoinType() == JoinRelType.RIGHT || join.getJoinType() == JoinRelType.INNER;
- boolean genPredOnRight = join.getJoinType() == JoinRelType.LEFT || join.getJoinType() == JoinRelType.INNER;
+ boolean genPredOnLeft = join.getJoinType() == JoinRelType.RIGHT || join.getJoinType() == JoinRelType.INNER || join.isSemiJoin();
+ boolean genPredOnRight = join.getJoinType() == JoinRelType.LEFT || join.getJoinType() == JoinRelType.INNER || join.isSemiJoin();
RexNode newLeftPredicate = getNewPredicate(join, registry, joinPredInfo, leftPushedPredicates, genPredOnLeft, 0);
RexNode newRightPredicate = getNewPredicate(join, registry, joinPredInfo, rightPushedPredicates, genPredOnRight, 1);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java
index c735df8..a657d13 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java
@@ -117,7 +117,7 @@ public class HiveJoinConstraintsRule extends RelOptRule {
// These boolean values represent corresponding left, right input which is potential FK
boolean leftInputPotentialFK = topRefs.intersects(leftBits);
boolean rightInputPotentialFK = topRefs.intersects(rightBits);
- if (leftInputPotentialFK && rightInputPotentialFK && joinType == JoinRelType.INNER) {
+ if (leftInputPotentialFK && rightInputPotentialFK && (joinType == JoinRelType.INNER || joinType == JoinRelType.SEMI)) {
// Both inputs are referenced. Before making a decision, try to swap
// references in join condition if it is an inner join, i.e. if a join
// condition column is referenced above the join, then we can just
@@ -198,6 +198,7 @@ public class HiveJoinConstraintsRule extends RelOptRule {
final Mode mode;
switch (joinType) {
+ case SEMI:
case INNER:
if (leftInputPotentialFK && rightInputPotentialFK) {
// Bails out as it references columns from both sides (or no columns)
@@ -283,4 +284,4 @@ public class HiveJoinConstraintsRule extends RelOptRule {
// Transforms LEFT/RIGHT outer join into INNER join
TRANSFORM
}
-}
\ No newline at end of file
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java
index 38759c0..545255c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectJoinTransposeRule.java
@@ -24,7 +24,6 @@ import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.Project;
-import org.apache.calcite.rel.core.SemiJoin;
import org.apache.calcite.rel.rules.PushProjector;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexNode;
@@ -77,7 +76,7 @@ public class HiveProjectJoinTransposeRule extends RelOptRule {
Project origProj = call.rel(0);
final Join join = call.rel(1);
- if (join instanceof SemiJoin) {
+ if (join.isSemiJoin()) {
return; // TODO: support SemiJoin
}
// locate all fields referenced in the projection and join condition;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
index 86b7914..068b687 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
@@ -76,7 +76,6 @@ import org.apache.calcite.rex.RexShuttle;
import org.apache.calcite.rex.RexSubQuery;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.rex.RexVisitorImpl;
-import org.apache.calcite.sql.SemiJoinType;
import org.apache.calcite.sql.SqlFunction;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlOperator;
@@ -503,9 +502,6 @@ public final class HiveRelDecorrelator implements ReflectiveVisitor {
* @param rel Aggregate to rewrite
*/
public Frame decorrelateRel(Aggregate rel) throws SemanticException{
- if (rel.getGroupType() != Aggregate.Group.SIMPLE) {
- throw new AssertionError(Bug.CALCITE_461_FIXED);
- }
//
// Rewrite logic:
//
@@ -688,9 +684,6 @@ public final class HiveRelDecorrelator implements ReflectiveVisitor {
}
public Frame decorrelateRel(HiveAggregate rel) throws SemanticException{
- if (rel.getGroupType() != Aggregate.Group.SIMPLE) {
- throw new AssertionError(Bug.CALCITE_461_FIXED);
- }
//
// Rewrite logic:
//
@@ -1248,7 +1241,7 @@ public final class HiveRelDecorrelator implements ReflectiveVisitor {
}
if(oldInput instanceof LogicalCorrelate
- && ((LogicalCorrelate) oldInput).getJoinType() == SemiJoinType.SEMI
+ && ((LogicalCorrelate) oldInput).getJoinType() == JoinRelType.SEMI
&& !cm.mapRefRelToCorRef.containsKey(rel)) {
// this conditions need to be pushed into semi-join since this condition
// corresponds to IN
@@ -1261,7 +1254,7 @@ public final class HiveRelDecorrelator implements ReflectiveVisitor {
RexUtil.composeConjunction(rexBuilder, conditions, false);
RelNode newRel = HiveSemiJoin.getSemiJoin(frame.r.getCluster(), frame.r.getTraitSet(),
- join.getLeft(), join.getRight(), condition, join.getLeftKeys(), join.getRightKeys());
+ join.getLeft(), join.getRight(), condition);
return register(rel, newRel, frame.oldToNewOutputs, frame.corDefOutputs);
}
@@ -1320,7 +1313,7 @@ public final class HiveRelDecorrelator implements ReflectiveVisitor {
}
if(oldInput instanceof LogicalCorrelate
- && ((LogicalCorrelate) oldInput).getJoinType() == SemiJoinType.SEMI
+ && ((LogicalCorrelate) oldInput).getJoinType() == JoinRelType.SEMI
&& !cm.mapRefRelToCorRef.containsKey(rel)) {
// this conditions need to be pushed into semi-join since this condition
// corresponds to IN
@@ -1332,7 +1325,7 @@ public final class HiveRelDecorrelator implements ReflectiveVisitor {
final RexNode condition =
RexUtil.composeConjunction(rexBuilder, conditions, false);
RelNode newRel = HiveSemiJoin.getSemiJoin(frame.r.getCluster(), frame.r.getTraitSet(),
- join.getLeft(), join.getRight(), condition, join.getLeftKeys(), join.getRightKeys());
+ join.getLeft(), join.getRight(), condition);
return register(rel, newRel, frame.oldToNewOutputs, frame.corDefOutputs);
}
@@ -1455,14 +1448,13 @@ public final class HiveRelDecorrelator implements ReflectiveVisitor {
RelNode newJoin = null;
// this indicates original query was either correlated EXISTS or IN
- if(rel.getJoinType() == SemiJoinType.SEMI) {
+ if(rel.getJoinType() == JoinRelType.SEMI) {
final List<Integer> leftKeys = new ArrayList<Integer>();
final List<Integer> rightKeys = new ArrayList<Integer>();
RelNode[] inputRels = new RelNode[] {leftFrame.r, rightFrame.r};
newJoin = HiveSemiJoin.getSemiJoin(rel.getCluster(),
- rel.getCluster().traitSetOf(HiveRelNode.CONVENTION), leftFrame.r, rightFrame.r,
- condition, ImmutableIntList.copyOf(leftKeys), ImmutableIntList.copyOf(rightKeys));
+ rel.getCluster().traitSetOf(HiveRelNode.CONVENTION), leftFrame.r, rightFrame.r, condition);
} else {
// Right input positions are shifted by newLeftFieldCount.
@@ -1473,7 +1465,7 @@ public final class HiveRelDecorrelator implements ReflectiveVisitor {
}
newJoin = relBuilder.push(leftFrame.r).push(rightFrame.r)
- .join(rel.getJoinType().toJoinType(), condition).build();
+ .join(rel.getJoinType(), condition).build();
}
valueGen.pop();
@@ -1720,7 +1712,7 @@ public final class HiveRelDecorrelator implements ReflectiveVisitor {
Project project,
Set<Integer> isCount) {
final RelNode left = correlate.getLeft();
- final JoinRelType joinType = correlate.getJoinType().toJoinType();
+ final JoinRelType joinType = correlate.getJoinType();
// now create the new project
final List<Pair<RexNode, String>> newProjects = Lists.newArrayList();
@@ -2258,10 +2250,10 @@ public final class HiveRelDecorrelator implements ReflectiveVisitor {
// Aggregate (groupby (0) single_value())
// Project-A (may reference coVar)
// RightInputRel
- if(correlate.getJoinType() != SemiJoinType.LEFT) {
+ if(correlate.getJoinType() != JoinRelType.LEFT) {
return;
}
- final JoinRelType joinType = correlate.getJoinType().toJoinType();
+ final JoinRelType joinType = correlate.getJoinType();
// corRel.getCondition was here, however Correlate was updated so it
// never includes a join condition. The code was not modified for brevity.
@@ -2470,11 +2462,11 @@ public final class HiveRelDecorrelator implements ReflectiveVisitor {
return;
}
- if(correlate.getJoinType() != SemiJoinType.LEFT) {
+ if(correlate.getJoinType() != JoinRelType.LEFT) {
return;
}
- final JoinRelType joinType = correlate.getJoinType().toJoinType();
+ final JoinRelType joinType = correlate.getJoinType();
// corRel.getCondition was here, however Correlate was updated so it
// never includes a join condition. The code was not modified for brevity.
RexNode joinCond = rexBuilder.makeLiteral(true);
@@ -2877,11 +2869,11 @@ public final class HiveRelDecorrelator implements ReflectiveVisitor {
return;
}
- if(correlate.getJoinType() != SemiJoinType.LEFT) {
+ if(correlate.getJoinType() != JoinRelType.LEFT) {
return;
}
- JoinRelType joinType = correlate.getJoinType().toJoinType();
+ JoinRelType joinType = correlate.getJoinType();
// corRel.getCondition was here, however Correlate was updated so it
// never includes a join condition. The code was not modified for brevity.
RexNode joinCond = rexBuilder.makeLiteral(true);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveGBYSemiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveGBYSemiJoinRule.java
index 4992e70..82704a2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveGBYSemiJoinRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveGBYSemiJoinRule.java
@@ -51,9 +51,8 @@ public class HiveRemoveGBYSemiJoinRule extends RelOptRule {
@Override public void onMatch(RelOptRuleCall call) {
final HiveSemiJoin semijoin= call.rel(0);
- if(semijoin.getJoinType() != JoinRelType.INNER) {
- return;
- }
+ assert semijoin.getJoinType() == JoinRelType.SEMI;
+
final RelNode left = call.rel(1);
final Aggregate rightAggregate= call.rel(2);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/JDBCSortPushDownRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/JDBCSortPushDownRule.java
index 79c6c12..7f683b8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/JDBCSortPushDownRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/jdbc/JDBCSortPushDownRule.java
@@ -19,13 +19,18 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules.jdbc;
import java.util.Arrays;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcFilter;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcFilterRule;
import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcSortRule;
import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcSort;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.core.Sort;
import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.HiveJdbcConverter;
import org.slf4j.Logger;
@@ -68,15 +73,21 @@ public class JDBCSortPushDownRule extends RelOptRule {
final HiveSortLimit sort = call.rel(0);
final HiveJdbcConverter converter = call.rel(1);
- JdbcSort jdbcSort = new JdbcSort(
- sort.getCluster(),
- sort.getTraitSet().replace(converter.getJdbcConvention()),
- converter.getInput(),
- sort.getCollation(),
- sort.offset,
- sort.fetch);
+ RelNode node = (sort.fetch != null && RexLiteral.intValue(sort.fetch) == 0)
+ ? new JdbcFilter(
+ sort.getCluster(),
+ sort.getTraitSet().replace(converter.getJdbcConvention()),
+ converter.getInput(),
+ call.builder().literal(false))
+ : new JdbcSort(
+ sort.getCluster(),
+ sort.getTraitSet().replace(converter.getJdbcConvention()),
+ converter.getInput(),
+ sort.getCollation(),
+ sort.offset,
+ sort.fetch);
- call.transformTo(converter.copy(converter.getTraitSet(), jdbcSort));
+ call.transformTo(converter.copy(converter.getTraitSet(), node));
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java
index b2b2f3c..e99048b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java
@@ -25,7 +25,6 @@ import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
-import org.apache.calcite.rel.core.SemiJoin;
import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMdDistinctRowCount;
@@ -41,6 +40,7 @@ import org.apache.calcite.util.NumberUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
import org.apache.hadoop.hive.ql.plan.ColStatistics;
@@ -82,22 +82,15 @@ public class HiveRelMdDistinctRowCount extends RelMdDistinctRowCount {
.getCluster().getRexBuilder().makeLiteral(true));
}
- @Override
- public Double getDistinctRowCount(Join rel, RelMetadataQuery mq, ImmutableBitSet groupKey,
+ public Double getDistinctRowCount(HiveSemiJoin rel, RelMetadataQuery mq, ImmutableBitSet groupKey,
RexNode predicate) {
- if (rel instanceof HiveJoin) {
- HiveJoin hjRel = (HiveJoin) rel;
- //TODO: Improve this
- if (rel instanceof SemiJoin) {
- return mq.getDistinctRowCount(hjRel.getLeft(), groupKey,
- rel.getCluster().getRexBuilder().makeLiteral(true));
- } else {
- return getJoinDistinctRowCount(mq, rel, rel.getJoinType(),
- groupKey, predicate, true);
- }
- }
+ return super.getDistinctRowCount(rel, mq, groupKey, predicate);
+ }
- return mq.getDistinctRowCount(rel, groupKey, predicate);
+ public Double getDistinctRowCount(HiveJoin rel, RelMetadataQuery mq, ImmutableBitSet groupKey,
+ RexNode predicate) {
+ return getJoinDistinctRowCount(mq, rel, rel.getJoinType(),
+ groupKey, predicate, true);
}
/**
@@ -116,7 +109,7 @@ public class HiveRelMdDistinctRowCount extends RelMdDistinctRowCount {
* otherwise use <code>left NDV * right NDV</code>.
* @return number of distinct rows
*/
- public static Double getJoinDistinctRowCount(RelMetadataQuery mq,
+ private static Double getJoinDistinctRowCount(RelMetadataQuery mq,
RelNode joinRel, JoinRelType joinType, ImmutableBitSet groupKey,
RexNode predicate, boolean useMaxNdv) {
Double distRowCount = null;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMaxRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMaxRowCount.java
new file mode 100644
index 0000000..b45d765
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMaxRowCount.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* TODO: CALC-2991 created some optimizations. This file bypasses
+ the change for now (see HIVE-22408)
+*/
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.Join;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
+import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdDistinctRowCount;
+import org.apache.calcite.rel.metadata.RelMdUtil;
+import org.apache.calcite.rel.metadata.RelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMdMaxRowCount;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.util.BuiltInMethod;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.NumberUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+
+import com.google.common.collect.ImmutableList;
+
+public class HiveRelMdMaxRowCount extends RelMdMaxRowCount {
+
+ private static final HiveRelMdMaxRowCount INSTANCE =
+ new HiveRelMdMaxRowCount();
+
+ public static final RelMetadataProvider SOURCE =
+ ChainedRelMetadataProvider.of(
+ ImmutableList.of(
+ ReflectiveRelMetadataProvider.reflectiveSource(
+ BuiltInMethod.MAX_ROW_COUNT.method, new HiveRelMdMaxRowCount()),
+ RelMdMaxRowCount.SOURCE));
+
+ private HiveRelMdMaxRowCount() {
+ super();
+ }
+
+ @Override
+ public Double getMaxRowCount(Aggregate rel, RelMetadataQuery mq) {
+ if (rel.getGroupSet().isEmpty()) {
+ // Aggregate with no GROUP BY always returns 1 row (even on empty table).
+ return 1D;
+ }
+
+ final Double rowCount = mq.getMaxRowCount(rel.getInput());
+ if (rowCount == null) {
+ return null;
+ }
+ return rowCount * rel.getGroupSets().size();
+ }
+
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
index a137bdf..0527e2b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
@@ -39,7 +39,6 @@ import org.apache.calcite.rel.core.Aggregate;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.Project;
-import org.apache.calcite.rel.core.SemiJoin;
import org.apache.calcite.rel.core.Union;
import org.apache.calcite.rel.metadata.BuiltInMetadata;
import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
@@ -316,7 +315,7 @@ public class HiveRelMdPredicates implements MetadataHandler<BuiltInMetadata.Pred
public JoinConditionBasedPredicateInference(Join joinRel,
RexNode lPreds, RexNode rPreds) {
- this(joinRel, joinRel instanceof SemiJoin, lPreds, rPreds);
+ this(joinRel, ((Join) joinRel).isSemiJoin(), lPreds, rPreds);
}
private JoinConditionBasedPredicateInference(Join joinRel, boolean isSemiJoin,
@@ -416,6 +415,7 @@ public class HiveRelMdPredicates implements MetadataHandler<BuiltInMetadata.Pred
switch (joinType) {
case INNER:
case LEFT:
+ case SEMI:
infer(leftPreds, allExprsDigests, inferredPredicates,
nonFieldsPredicates, includeEqualityInference,
joinType == JoinRelType.LEFT ? rightFieldsBitSet
@@ -425,6 +425,7 @@ public class HiveRelMdPredicates implements MetadataHandler<BuiltInMetadata.Pred
switch (joinType) {
case INNER:
case RIGHT:
+ case SEMI:
infer(rightPreds, allExprsDigests, inferredPredicates,
nonFieldsPredicates, includeEqualityInference,
joinType == JoinRelType.RIGHT ? leftFieldsBitSet
@@ -453,7 +454,7 @@ public class HiveRelMdPredicates implements MetadataHandler<BuiltInMetadata.Pred
}
}
- if (joinType == JoinRelType.INNER && !nonFieldsPredicates.isEmpty()) {
+ if ((joinType == JoinRelType.INNER || joinType == JoinRelType.SEMI) && !nonFieldsPredicates.isEmpty()) {
// Predicates without field references can be pushed to both inputs
final Set<String> leftPredsSet = new HashSet<String>(
Lists.transform(leftPreds, HiveCalciteUtil.REX_STR_FN));
@@ -471,15 +472,13 @@ public class HiveRelMdPredicates implements MetadataHandler<BuiltInMetadata.Pred
switch (joinType) {
case INNER:
- Iterable<RexNode> pulledUpPredicates;
- if (isSemiJoin) {
- pulledUpPredicates = Iterables.concat(leftPreds, leftInferredPredicates);
- } else {
- pulledUpPredicates = Iterables.concat(leftPreds, rightPreds,
+ Iterable<RexNode> pulledUpPredicates = Iterables.concat(leftPreds, rightPreds,
RelOptUtil.conjunctions(joinRel.getCondition()), inferredPredicates);
- }
return RelOptPredicateList.of(rexBuilder,
pulledUpPredicates, leftInferredPredicates, rightInferredPredicates);
+ case SEMI:
+ return RelOptPredicateList.of(rexBuilder, Iterables.concat(leftPreds, leftInferredPredicates),
+ leftInferredPredicates, rightInferredPredicates);
case LEFT:
return RelOptPredicateList.of(rexBuilder,
leftPreds, EMPTY_LIST, rightInferredPredicates);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
index d881fbd..82e4cc1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java
@@ -30,7 +30,6 @@ import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.Project;
-import org.apache.calcite.rel.core.SemiJoin;
import org.apache.calcite.rel.core.Sort;
import org.apache.calcite.rel.core.TableScan;
import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
@@ -54,6 +53,8 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.PKFKJoinInfo;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
import org.apache.hadoop.hive.ql.plan.ColStatistics;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -73,8 +74,7 @@ public class HiveRelMdRowCount extends RelMdRowCount {
super();
}
- @Override
- public Double getRowCount(Join join, RelMetadataQuery mq) {
+ public Double getRowCount(HiveJoin join, RelMetadataQuery mq) {
// Try to infer from constraints first
final Pair<PKFKRelationInfo, RexNode> constraintBasedResult =
constraintsBasedAnalyzeJoinForPKFK(join, mq);
@@ -117,8 +117,7 @@ public class HiveRelMdRowCount extends RelMdRowCount {
return rowCount;
}
- @Override
- public Double getRowCount(SemiJoin rel, RelMetadataQuery mq) {
+ public Double getRowCount(HiveSemiJoin rel, RelMetadataQuery mq) {
PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel, mq);
if (pkfk != null) {
double selectivity = pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor;
@@ -249,7 +248,7 @@ public class HiveRelMdRowCount extends RelMdRowCount {
// @todo: remove this. 8/28/14 hb
// for now adding because RelOptUtil.classifyFilters has an assertion about
// column counts that is not true for semiJoins.
- if (joinRel instanceof SemiJoin) {
+ if (joinRel.isSemiJoin()) {
return null;
}
@@ -356,7 +355,7 @@ public class HiveRelMdRowCount extends RelMdRowCount {
*/
public static Pair<PKFKRelationInfo, RexNode> constraintsBasedAnalyzeJoinForPKFK(Join join, RelMetadataQuery mq) {
- if (join instanceof SemiJoin) {
+ if (join.isSemiJoin()) {
// TODO: Support semijoin
return null;
}
@@ -391,9 +390,9 @@ public class HiveRelMdRowCount extends RelMdRowCount {
return null;
}
- boolean leftIsKey = (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.RIGHT)
+ boolean leftIsKey = (join.getJoinType() == JoinRelType.INNER || join.isSemiJoin() || join.getJoinType() == JoinRelType.RIGHT)
&& leftInputResult.isPkFkJoin;
- boolean rightIsKey = (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.LEFT)
+ boolean rightIsKey = (join.getJoinType() == JoinRelType.INNER || join.isSemiJoin() || join.getJoinType() == JoinRelType.LEFT)
&& rightInputResult.isPkFkJoin;
if (!leftIsKey && !rightIsKey) {
// Nothing to do here, bail out
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
index f6a6cf4..1724ab1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java
@@ -25,7 +25,6 @@ import java.util.Set;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
-import org.apache.calcite.rel.core.SemiJoin;
import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMdSelectivity;
import org.apache.calcite.rel.metadata.RelMdUtil;
@@ -65,7 +64,7 @@ public class HiveRelMdSelectivity extends RelMdSelectivity {
}
public Double getSelectivity(Join j, RelMetadataQuery mq, RexNode predicate) {
- if (j.getJoinType().equals(JoinRelType.INNER)) {
+ if (j.getJoinType().equals(JoinRelType.INNER) || j.isSemiJoin()) {
return computeInnerJoinSelectivity(j, mq, predicate);
} else if (j.getJoinType().equals(JoinRelType.LEFT) ||
j.getJoinType().equals(JoinRelType.RIGHT)) {
@@ -143,7 +142,7 @@ public class HiveRelMdSelectivity extends RelMdSelectivity {
ndvEstimate = exponentialBackoff(peLst, colStatMap);
}
- if (j instanceof SemiJoin) {
+ if (j.isSemiJoin()) {
ndvEstimate = Math.min(mq.getRowCount(j.getLeft()),
ndvEstimate);
} else if (j instanceof HiveJoin) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java
index 75a903c..eebeb4c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
import java.util.List;
import org.apache.calcite.rel.RelNode;
-import org.apache.calcite.rel.core.SemiJoin;
import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMdSize;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
@@ -31,6 +30,7 @@ import org.apache.calcite.util.BuiltInMethod;
import org.apache.calcite.util.ImmutableNullableList;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
import org.apache.hadoop.hive.ql.plan.ColStatistics;
@@ -87,8 +87,7 @@ public class HiveRelMdSize extends RelMdSize {
return list.build();
}
- @Override
- public List<Double> averageColumnSizes(SemiJoin rel, RelMetadataQuery mq) {
+ public List<Double> averageColumnSizes(HiveSemiJoin rel, RelMetadataQuery mq) {
final RelNode left = rel.getLeft();
final List<Double> lefts =
mq.getAverageColumnSizes(left);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
index c4c771e..7328b72 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java
@@ -155,17 +155,15 @@ public class ASTBuilder {
return b.node();
}
- public static ASTNode join(ASTNode left, ASTNode right, JoinRelType joinType, ASTNode cond,
- boolean semiJoin) {
+ public static ASTNode join(ASTNode left, ASTNode right, JoinRelType joinType, ASTNode cond) {
ASTBuilder b = null;
switch (joinType) {
+ case SEMI:
+ b = ASTBuilder.construct(HiveParser.TOK_LEFTSEMIJOIN, "TOK_LEFTSEMIJOIN");
+ break;
case INNER:
- if (semiJoin) {
- b = ASTBuilder.construct(HiveParser.TOK_LEFTSEMIJOIN, "TOK_LEFTSEMIJOIN");
- } else {
- b = ASTBuilder.construct(HiveParser.TOK_JOIN, "TOK_JOIN");
- }
+ b = ASTBuilder.construct(HiveParser.TOK_JOIN, "TOK_JOIN");
break;
case LEFT:
b = ASTBuilder.construct(HiveParser.TOK_LEFTOUTERJOIN, "TOK_LEFTOUTERJOIN");
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
index 6c4edeb..718a2d0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java
@@ -36,7 +36,6 @@ import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.Project;
-import org.apache.calcite.rel.core.SemiJoin;
import org.apache.calcite.rel.core.Sort;
import org.apache.calcite.rel.core.TableFunctionScan;
import org.apache.calcite.rel.core.TableScan;
@@ -378,7 +377,7 @@ public class ASTConverter {
QueryBlockInfo right = convertSource(join.getRight());
s = new Schema(left.schema, right.schema);
ASTNode cond = join.getCondition().accept(new RexVisitor(s, false, r.getCluster().getRexBuilder()));
- boolean semiJoin = join instanceof SemiJoin;
+ boolean semiJoin = join.isSemiJoin();
if (join.getRight() instanceof Join && !semiJoin) {
// should not be done for semijoin since it will change the semantics
// Invert join inputs; this is done because otherwise the SemanticAnalyzer
@@ -391,9 +390,9 @@ public class ASTConverter {
} else {
type = join.getJoinType();
}
- ast = ASTBuilder.join(right.ast, left.ast, type, cond, semiJoin);
+ ast = ASTBuilder.join(right.ast, left.ast, type, cond);
} else {
- ast = ASTBuilder.join(left.ast, right.ast, join.getJoinType(), cond, semiJoin);
+ ast = ASTBuilder.join(left.ast, right.ast, join.getJoinType(), cond);
}
if (semiJoin) {
s = left.schema;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
index eb6d02c..c11ed59 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
@@ -37,7 +37,6 @@ import org.apache.calcite.rel.RelFieldCollation;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
-import org.apache.calcite.rel.core.SemiJoin;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
@@ -175,8 +174,8 @@ public class HiveOpConverter {
return visit((HiveMultiJoin) rn);
} else if (rn instanceof HiveJoin) {
return visit((HiveJoin) rn);
- } else if (rn instanceof SemiJoin) {
- return visit((SemiJoin)rn);
+ } else if (rn instanceof HiveSemiJoin) {
+ return visit((HiveSemiJoin) rn);
} else if (rn instanceof HiveFilter) {
return visit((HiveFilter) rn);
} else if (rn instanceof HiveSortLimit) {
@@ -332,8 +331,7 @@ public class HiveOpConverter {
return translateJoin(joinRel);
}
-
- OpAttr visit(SemiJoin joinRel) throws SemanticException {
+ OpAttr visit(HiveSemiJoin joinRel) throws SemanticException {
return translateJoin(joinRel);
}
@@ -366,7 +364,7 @@ public class HiveOpConverter {
Set<Integer> newVcolsInCalcite = new HashSet<Integer>();
newVcolsInCalcite.addAll(inputs[0].vcolsInCalcite);
if (joinRel instanceof HiveMultiJoin ||
- !(joinRel instanceof SemiJoin)) {
+ !((joinRel instanceof Join) && ((Join) joinRel).isSemiJoin())) {
int shift = inputs[0].inputs.get(0).getSchema().getSignature().size();
for (int i = 1; i < inputs.length; i++) {
newVcolsInCalcite.addAll(HiveCalciteUtil.shiftVColsSet(inputs[i].vcolsInCalcite, shift));
@@ -904,7 +902,7 @@ public class HiveOpConverter {
noOuterJoin = !hmj.isOuterJoin();
} else {
joinCondns = new JoinCondDesc[1];
- semiJoin = join instanceof SemiJoin;
+ semiJoin = (join instanceof Join) && ((Join) join).isSemiJoin();
JoinType joinType;
if (semiJoin) {
joinType = JoinType.LEFTSEMI;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 91ec00b..4762335 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -2705,7 +2705,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
calciteJoinType = JoinRelType.FULL;
break;
case LEFTSEMI:
- calciteJoinType = JoinRelType.INNER;
+ calciteJoinType = JoinRelType.SEMI;
leftSemiJoin = true;
break;
case INNER:
@@ -2739,8 +2739,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
ImmutableList.of(remainingEquiCond, nonEquiConds), false) :
nonEquiConds;
topRel = HiveSemiJoin.getSemiJoin(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
- inputRels[0], inputRels[1], calciteJoinCond, ImmutableIntList.copyOf(leftKeys),
- ImmutableIntList.copyOf(rightKeys));
+ inputRels[0], inputRels[1], calciteJoinCond);
// Create join RR: we need to check whether we need to update left RR in case
// previous call to projectNonColumnEquiConditions updated it
diff --git a/ql/src/test/results/clientpositive/acid_nullscan.q.out b/ql/src/test/results/clientpositive/acid_nullscan.q.out
index 0db7d34..7f548bd 100644
--- a/ql/src/test/results/clientpositive/acid_nullscan.q.out
+++ b/ql/src/test/results/clientpositive/acid_nullscan.q.out
@@ -38,7 +38,7 @@ POSTHOOK: Input: default@acid_vectorized_n1
#### A masked pattern was here ####
OPTIMIZED SQL: SELECT SUM(`a`) AS `$f0`
FROM `default`.`acid_vectorized_n1`
-WHERE FALSE
+LIMIT 0
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -51,26 +51,29 @@ STAGE PLANS:
alias: acid_vectorized_n1
Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(a)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
+ Select Operator
+ expressions: a (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- tag: -1
- value expressions: _col0 (type: bigint)
- auto parallelism: false
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col0 (type: bigint)
+ auto parallelism: false
Execution mode: vectorized
Path -> Alias:
- nullscan://null/default.acid_vectorized_n1/part_ [acid_vectorized_n1]
+ nullscan://null/default.acid_vectorized_n1/part_ [$hdt$_0:$hdt$_0:acid_vectorized_n1]
Path -> Partition:
nullscan://null/default.acid_vectorized_n1/part_
Partition
@@ -126,7 +129,7 @@ STAGE PLANS:
name: default.acid_vectorized_n1
name: default.acid_vectorized_n1
Truncated Path -> Alias:
- nullscan://null/default.acid_vectorized_n1/part_ [acid_vectorized_n1]
+ nullscan://null/default.acid_vectorized_n1/part_ [$hdt$_0:$hdt$_0:acid_vectorized_n1]
Needs Tagging: false
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
index 588b04c..6f8fad3 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
@@ -457,35 +457,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_orc
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: loc_orc
- Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
@@ -589,35 +566,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_orc
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: loc_orc
- Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
@@ -630,35 +584,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_orc
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: loc_orc
- Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
@@ -671,35 +602,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_orc
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: loc_orc
- Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
@@ -754,35 +662,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_orc
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: loc_orc
- Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
@@ -795,35 +680,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_orc
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: loc_orc
- Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
diff --git a/ql/src/test/results/clientpositive/annotate_stats_part.q.out b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
index 05bfa34..31fa142 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_part.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
@@ -317,19 +317,9 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
- TableScan
- alias: loc_orc_n4
- Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: PARTIAL
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: PARTIAL
- Select Operator
- expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: PARTIAL
- ListSink
+ ListSink
PREHOOK: query: analyze table loc_orc_n4 partition(year='2001') compute statistics for columns state,locid
PREHOOK: type: ANALYZE_TABLE
diff --git a/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out b/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out
index e4cd76d..3ef5b0e 100644
--- a/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out
+++ b/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out
@@ -26,7 +26,7 @@ POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@tbl_n1
POSTHOOK: Lineage: tbl_n1.n SCRIPT []
POSTHOOK: Lineage: tbl_n1.t SCRIPT []
-Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
PREHOOK: query: explain
select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n
PREHOOK: type: QUERY
@@ -46,25 +46,32 @@ STAGE PLANS:
Stage: Stage-4
Map Reduce Local Work
Alias -> Map Local Tables:
- $hdt$_1:tbl_n1
+ $hdt$_1:$hdt$_1:$hdt$_1:tbl_n1
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- $hdt$_1:tbl_n1
+ $hdt$_1:$hdt$_1:$hdt$_1:tbl_n1
TableScan
alias: tbl_n1
Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: n (type: bigint), t (type: string)
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: n (type: bigint), t (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- HashTable Sink Operator
- keys:
- 0
- 1
+ Filter Operator
+ predicate: (_col0 = 1L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 1L (type: bigint), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
+ HashTable Sink Operator
+ keys:
+ 0
+ 1
Stage: Stage-3
Map Reduce
@@ -109,7 +116,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
PREHOOK: query: select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n
PREHOOK: type: QUERY
PREHOOK: Input: default@tbl_n1
@@ -119,7 +126,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@tbl_n1
#### A masked pattern was here ####
1 one true true
-Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
PREHOOK: query: explain
select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n
PREHOOK: type: QUERY
@@ -139,28 +146,35 @@ STAGE PLANS:
Stage: Stage-4
Map Reduce Local Work
Alias -> Map Local Tables:
- $hdt$_0:tbl_n1
+ $hdt$_0:$hdt$_0:$hdt$_0:tbl_n1
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- $hdt$_0:tbl_n1
+ $hdt$_0:$hdt$_0:$hdt$_0:tbl_n1
TableScan
alias: tbl_n1
Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: n (type: bigint), t (type: string)
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: n (type: bigint), t (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- HashTable Sink Operator
- filter predicates:
- 0
- 1 {true}
- keys:
- 0
- 1
+ Filter Operator
+ predicate: (_col0 = 2L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 2L (type: bigint), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
+ HashTable Sink Operator
+ filter predicates:
+ 0
+ 1 {true}
+ keys:
+ 0
+ 1
Stage: Stage-3
Map Reduce
@@ -208,7 +222,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
PREHOOK: query: select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n
PREHOOK: type: QUERY
PREHOOK: Input: default@tbl_n1
diff --git a/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out b/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out
index 94590f1..5ca758e 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out
@@ -893,19 +893,9 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
- TableScan
- alias: cbo_t2
- Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL
- Select Operator
- expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
- outputColumnNames: key, value, c_int, c_float, c_boolean, dt
- Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL
- ListSink
+ ListSink
PREHOOK: query: -- rewrite to NULL
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int)
diff --git a/ql/src/test/results/clientpositive/cbo_simple_select.q.out b/ql/src/test/results/clientpositive/cbo_simple_select.q.out
index bd84e0d..ce971f9 100644
--- a/ql/src/test/results/clientpositive/cbo_simple_select.q.out
+++ b/ql/src/test/results/clientpositive/cbo_simple_select.q.out
@@ -893,19 +893,9 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
- TableScan
- alias: cbo_t2
- Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL
- Select Operator
- expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL
- ListSink
+ ListSink
PREHOOK: query: -- rewrite to NULL
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int)
diff --git a/ql/src/test/results/clientpositive/concat_op.q.out b/ql/src/test/results/clientpositive/concat_op.q.out
index a36379a..4442658 100644
--- a/ql/src/test/results/clientpositive/concat_op.q.out
+++ b/ql/src/test/results/clientpositive/concat_op.q.out
@@ -311,7 +311,7 @@ POSTHOOK: query: explain formatted select key || value from src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
-{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"src\"\n ],\n \"table:alias\": \"src\",\n \"inputs\": [],\n \"rowCount\": 500.0,\n \"avgRowSize\": 9.624,\n \"rowType\": [\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": [...]
+{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"src\"\n ],\n \"table:alias\": \"src\",\n \"inputs\": [],\n \"rowCount\": 500.0,\n \"avgRowSize\": 9.624,\n \"rowType\": [\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": [...]
PREHOOK: query: explain formatted select key || value || key from src
PREHOOK: type: QUERY
PREHOOK: Input: default@src
@@ -320,7 +320,7 @@ POSTHOOK: query: explain formatted select key || value || key from src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
-{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"src\"\n ],\n \"table:alias\": \"src\",\n \"inputs\": [],\n \"rowCount\": 500.0,\n \"avgRowSize\": 9.624,\n \"rowType\": [\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": [...]
+{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"src\"\n ],\n \"table:alias\": \"src\",\n \"inputs\": [],\n \"rowCount\": 500.0,\n \"avgRowSize\": 9.624,\n \"rowType\": [\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": [...]
PREHOOK: query: explain formatted select key || value || key || value from src
PREHOOK: type: QUERY
PREHOOK: Input: default@src
@@ -329,4 +329,4 @@ POSTHOOK: query: explain formatted select key || value || key || value from src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
-{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"src\"\n ],\n \"table:alias\": \"src\",\n \"inputs\": [],\n \"rowCount\": 500.0,\n \"avgRowSize\": 9.624,\n \"rowType\": [\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": [...]
+{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"src\"\n ],\n \"table:alias\": \"src\",\n \"inputs\": [],\n \"rowCount\": 500.0,\n \"avgRowSize\": 9.624,\n \"rowType\": [\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": [...]
diff --git a/ql/src/test/results/clientpositive/constprog3.q.out b/ql/src/test/results/clientpositive/constprog3.q.out
index 15fa434..a1a7ae8 100644
--- a/ql/src/test/results/clientpositive/constprog3.q.out
+++ b/ql/src/test/results/clientpositive/constprog3.q.out
@@ -42,28 +42,29 @@ STAGE PLANS:
TableScan
alias: table1
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: false (type: boolean)
+ Select Operator
+ expressions: id (type: int), val (type: int), val1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: id (type: int), val (type: int), val1 (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
TableScan
alias: table3
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
- Select Operator
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ filterExpr: (id = 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (id = 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
Join Operator
condition map:
@@ -72,10 +73,10 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 13 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 13 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
index e05bc13..cda99dd 100644
--- a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
+++ b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out
@@ -1348,7 +1348,7 @@ STAGE PLANS:
properties:
druid.fieldNames vc
druid.fieldTypes timestamp
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"}
+ druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','UTC')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"}
druid.query.type scan
Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE
Select Operator
@@ -1532,7 +1532,7 @@ STAGE PLANS:
properties:
druid.fieldNames vc
druid.fieldTypes string
- druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_format((div(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),1000) * '1000'),'yyyy-MM-dd HH:mm:ss','UTC')","outputType":"STRING"}],"l [...]
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_format((div(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','UTC'),1000) * '1000'),'yyy [...]
druid.query.type groupBy
Select Operator
expressions: vc (type: string)
@@ -1574,7 +1574,7 @@ STAGE PLANS:
properties:
druid.fieldNames vc
druid.fieldTypes string
- druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1Y','','UTC'),'yyyy-MM-dd','UTC')","outputType":"STRING"}], [...]
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','UTC'),'P1Y','', [...]
druid.query.type groupBy
Select Operator
expressions: vc (type: string)
@@ -1629,7 +1629,7 @@ STAGE PLANS:
properties:
druid.fieldNames vc
druid.fieldTypes string
- druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1M','','UTC'),'yyyy-MM-dd','UTC')","outputType":"STRING"}], [...]
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','UTC'),'P1M','', [...]
druid.query.type groupBy
Select Operator
expressions: vc (type: string)
@@ -1684,7 +1684,7 @@ STAGE PLANS:
properties:
druid.fieldNames vc
druid.fieldTypes string
- druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P3M','','UTC'),'yyyy-MM-dd','UTC')","outputType":"STRING"}], [...]
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"STRING"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','UTC'),'P3M','', [...]
druid.query.type groupBy
Select Operator
expressions: vc (type: string)
@@ -1773,7 +1773,7 @@ STAGE PLANS:
properties:
druid.fieldNames vc,$f1
druid.fieldTypes date,double
- druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"LONG"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_floor(timestamp_parse(timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1M','','UTC'),'yyyy-MM-dd','U [...]
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"LONG"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_floor(timestamp_parse(timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u [...]
druid.query.type groupBy
Select Operator
expressions: $f1 (type: double), vc (type: date)
@@ -1822,7 +1822,7 @@ STAGE PLANS:
properties:
druid.fieldNames vc,vc0,vc1,vc2
druid.fieldTypes date,date,date,int
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_shift(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1D','','UTC'),'P1D',CAST((\"cdouble\" / 1000), 'LONG'),'UTC')","outputType":"LONG"},{"type":"express [...]
+ druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_shift(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','UTC'),'P1D','','UTC'),'P1D',CAST((\"cdouble\" / 1000), ' [...]
druid.query.type scan
Statistics: Num rows: 9173 Data size: 1499152 Basic stats: COMPLETE Column stats: NONE
Select Operator
@@ -2367,7 +2367,7 @@ STAGE PLANS:
properties:
druid.fieldNames vc
druid.fieldTypes timestamp
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"}
+ druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','UTC')","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"}
druid.query.type scan
Statistics: Num rows: 9173 Data size: 348640 Basic stats: COMPLETE Column stats: NONE
Select Operator
diff --git a/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out b/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out
index 475a40e..7b80b05 100644
--- a/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out
+++ b/ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out
@@ -507,7 +507,7 @@ STAGE PLANS:
properties:
druid.fieldNames vc,vc0,vc1
druid.fieldTypes int,bigint,string
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"expression","expression":"(CAST(substring(timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1D','','UTC'),'yyyy-MM-dd','UTC'), 8, 2), 'DOUBLE') == 31)"},"virtualColumns":[{"type":"expression","name":"vc", [...]
+ druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"expression","expression":"(CAST(substring(timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','UTC'),'P1D','','UTC'),'yyyy-MM-dd','UTC'), 8, 2), 'DOUBLE') == 31)"} [...]
druid.query.type scan
Select Operator
expressions: vc (type: int), vc0 (type: bigint), vc1 (type: string)
@@ -589,7 +589,7 @@ STAGE PLANS:
properties:
druid.fieldNames vc,vc0,vc1
druid.fieldTypes double,int,string
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / 4) + 1) == 4)"},{"type":"bound","dimension":"__time","lower":"11","lowerStrict":false,"upper":"12","upperStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"M"," [...]
+ druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / 4) + 1) == 4)"},{"type":"bound","dimension":"__time","lower":"11","lowerStrict":false,"upper":"12","upperStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"M"," [...]
druid.query.type scan
Select Operator
expressions: vc (type: double), vc0 (type: int), vc1 (type: string)
@@ -671,7 +671,7 @@ STAGE PLANS:
properties:
druid.fieldNames vc,vc0
druid.fieldTypes int,string
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"bound","dimension":"__time","lower":"1969-01-01T08:00:00.000Z","lowerStrict":false,"ordering":"lexicographic","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"expression","expression":"(CAST(timestamp_extract(\"__time\",'YEAR','US/Pa [...]
+ druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"bound","dimension":"__time","lower":"1969-01-01T08:00:00.000Z","lowerStrict":false,"ordering":"lexicographic","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"expression","expression":"(CAST(timestamp_extract(\"__time\",'YEAR','US/Pa [...]
druid.query.type scan
Select Operator
expressions: vc (type: int), vc0 (type: string)
@@ -748,7 +748,7 @@ STAGE PLANS:
properties:
druid.fieldNames vc,$f1
druid.fieldTypes date,double
- druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"LONG"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1D','','UTC')","outputType":"LONG"}],"limitSpec":{"type":"default","limit":5," [...]
+ druid.query.json {"queryType":"groupBy","dataSource":"default.druid_table_alltypesorc","granularity":"all","dimensions":[{"type":"default","dimension":"vc","outputName":"vc","outputType":"LONG"}],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','UTC'),'P1D','','UTC')","outputType [...]
druid.query.type groupBy
Select Operator
expressions: vc (type: date), $f1 (type: double)
diff --git a/ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out b/ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out
index aced136..9502b52 100644
--- a/ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out
+++ b/ql/src/test/results/clientpositive/druid/druidmini_floorTime.q.out
@@ -508,7 +508,7 @@ STAGE PLANS:
properties:
druid.fieldNames vc,vc0,vc1
druid.fieldTypes int,bigint,string
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"expression","expression":"(CAST(substring(timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'','UTC'),'P1D','','UTC'),'yyyy-MM-dd','UTC'), 8, 2), 'DOUBLE') == 31)"},"virtualColumns":[{"type":"expression","name":"vc", [...]
+ druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"expression","expression":"(CAST(substring(timestamp_format(timestamp_floor(timestamp_parse(timestamp_format(\"__time\",'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','US/Pacific'),'yyyy-MM-dd\\u0027T\\u0027HH:mm:ss.SSS\\u0027Z\\u0027','UTC'),'P1D','','UTC'),'yyyy-MM-dd','UTC'), 8, 2), 'DOUBLE') == 31)"} [...]
druid.query.type scan
Select Operator
expressions: vc (type: int), vc0 (type: bigint), vc1 (type: string)
@@ -590,7 +590,7 @@ STAGE PLANS:
properties:
druid.fieldNames vc,vc0,vc1
druid.fieldTypes double,int,string
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / 4) + 1) == 4)"},{"type":"bound","dimension":"__time","lower":"11","lowerStrict":false,"upper":"12","upperStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"M"," [...]
+ druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"expression","expression":"(((CAST(timestamp_extract(\"__time\",'MONTH','US/Pacific'), 'DOUBLE') / 4) + 1) == 4)"},{"type":"bound","dimension":"__time","lower":"11","lowerStrict":false,"upper":"12","upperStrict":false,"ordering":"numeric","extractionFn":{"type":"timeFormat","format":"M"," [...]
druid.query.type scan
Select Operator
expressions: vc (type: double), vc0 (type: int), vc1 (type: string)
@@ -672,7 +672,7 @@ STAGE PLANS:
properties:
druid.fieldNames vc,vc0
druid.fieldTypes int,string
- druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"bound","dimension":"__time","lower":"1969-01-01T08:00:00.000Z","lowerStrict":false,"ordering":"lexicographic","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"expression","expression":"(CAST(timestamp_extract(\"__time\",'YEAR','US/Pa [...]
+ druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"and","fields":[{"type":"bound","dimension":"__time","lower":"1969-01-01T08:00:00.000Z","lowerStrict":false,"ordering":"lexicographic","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"expression","expression":"(CAST(timestamp_extract(\"__time\",'YEAR','US/Pa [...]
druid.query.type scan
Select Operator
expressions: vc (type: int), vc0 (type: string)
diff --git a/ql/src/test/results/clientpositive/filter_union.q.out b/ql/src/test/results/clientpositive/filter_union.q.out
index 29a8816..088e71d 100644
--- a/ql/src/test/results/clientpositive/filter_union.q.out
+++ b/ql/src/test/results/clientpositive/filter_union.q.out
@@ -586,21 +586,22 @@ STAGE PLANS:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(key)
keys: key (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
Execution mode: vectorized
Reduce Operator Tree:
@@ -609,18 +610,21 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: bigint), 4 (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 99 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint), 4 (type: int)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 99 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 99 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/fold_case.q.out b/ql/src/test/results/clientpositive/fold_case.q.out
index 53d19c5..6577bb2 100644
--- a/ql/src/test/results/clientpositive/fold_case.q.out
+++ b/ql/src/test/results/clientpositive/fold_case.q.out
@@ -137,8 +137,8 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -272,8 +272,8 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -419,39 +419,26 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: null (type: void)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: null (type: void)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
PREHOOK: query: explain
select count(1) from src where (case key when '238' then null else 1=1 end)
diff --git a/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out
index 006b51e..e3c483b 100644
--- a/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out
+++ b/ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out
@@ -227,35 +227,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
diff --git a/ql/src/test/results/clientpositive/fold_when.q.out b/ql/src/test/results/clientpositive/fold_when.q.out
index 2ff8595..8753d9f 100644
--- a/ql/src/test/results/clientpositive/fold_when.q.out
+++ b/ql/src/test/results/clientpositive/fold_when.q.out
@@ -9,35 +9,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
@@ -52,35 +29,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
@@ -122,35 +76,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
@@ -280,35 +211,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
diff --git a/ql/src/test/results/clientpositive/infer_const_type.q.out b/ql/src/test/results/clientpositive/infer_const_type.q.out
index 9e48dbe..aacc329 100644
--- a/ql/src/test/results/clientpositive/infer_const_type.q.out
+++ b/ql/src/test/results/clientpositive/infer_const_type.q.out
@@ -129,35 +129,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@infertypes
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: infertypes
- Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: ti (type: tinyint), si (type: smallint), i (type: int), bi (type: bigint), fl (type: float), db (type: double), str (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
diff --git a/ql/src/test/results/clientpositive/infer_join_preds.q.out b/ql/src/test/results/clientpositive/infer_join_preds.q.out
index 9b8b038..06ace97 100644
--- a/ql/src/test/results/clientpositive/infer_join_preds.q.out
+++ b/ql/src/test/results/clientpositive/infer_join_preds.q.out
@@ -151,34 +151,38 @@ STAGE PLANS:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
TableScan
alias: src1
+ filterExpr: key is not null (type: boolean)
Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Reduce Operator Tree:
Join Operator
@@ -248,12 +252,12 @@ STAGE PLANS:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
@@ -346,19 +350,22 @@ STAGE PLANS:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
TableScan
alias: src1
Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
@@ -465,12 +472,12 @@ STAGE PLANS:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
@@ -601,12 +608,12 @@ STAGE PLANS:
TableScan
alias: src1
Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
diff --git a/ql/src/test/results/clientpositive/input9.q.out b/ql/src/test/results/clientpositive/input9.q.out
index 1a36b07..a055b8a 100644
--- a/ql/src/test/results/clientpositive/input9.q.out
+++ b/ql/src/test/results/clientpositive/input9.q.out
@@ -35,35 +35,39 @@ STAGE PLANS:
TableScan
alias: src1
Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: null (type: string), UDFToInteger(key) (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1_n159
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col0 (type: string), _col1 (type: int)
- outputColumnNames: value, key
+ expressions: null (type: string), UDFToInteger(_col0) (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll')
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1_n159
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int)
+ outputColumnNames: value, key
+ Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll')
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
diff --git a/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out b/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out
index d4f89bb..ef217fd 100644
--- a/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketpruning1.q.out
@@ -1542,58 +1542,14 @@ POSTHOOK: Input: default@srcbucket_pruned
#### A masked pattern was here ####
OPTIMIZED SQL: SELECT `key`, `value`, `ds`
FROM `default`.`srcbucket_pruned`
-WHERE FALSE
+LIMIT 0
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: srcbucket_pruned
- Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL
- Select Operator
- expressions: key (type: int), value (type: string), ds (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- columns _col0,_col1,_col2
- columns.types int:string:string
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Execution mode: vectorized, llap
- LLAP IO: unknown
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
diff --git a/ql/src/test/results/clientpositive/llap/cbo_simple_select.q.out b/ql/src/test/results/clientpositive/llap/cbo_simple_select.q.out
index 8f0ba6c..3a5953f 100644
--- a/ql/src/test/results/clientpositive/llap/cbo_simple_select.q.out
+++ b/ql/src/test/results/clientpositive/llap/cbo_simple_select.q.out
@@ -845,16 +845,9 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
- TableScan
- alias: cbo_t2
- Filter Operator
- predicate: false (type: boolean)
- Select Operator
- expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- ListSink
+ ListSink
PREHOOK: query: -- rewrite to NULL
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int)
diff --git a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out
index da8cae3..e377bb0 100644
--- a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out
@@ -280,37 +280,9 @@ POSTHOOK: Input: default@table3_n0
#### A masked pattern was here ####
Plan optimized by CBO.
-Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
-
Stage-0
Fetch Operator
- limit:-1
- Stage-1
- Reducer 2 llap
- File Output Operator [FS_13]
- Merge Join Operator [MERGEJOIN_18] (rows=1 width=185)
- Conds:RS_21.100, true=RS_25._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"]
- <-Map 1 [SIMPLE_EDGE] vectorized, llap
- SHUFFLE [RS_21]
- PartitionCols:100, true
- Select Operator [SEL_20] (rows=1 width=193)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_19] (rows=1 width=185)
- predicate:false
- TableScan [TS_0] (rows=10 width=185)
- default@table1_n10,table1_n10,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1"]
- <-Map 3 [SIMPLE_EDGE] vectorized, llap
- SHUFFLE [RS_25]
- PartitionCols:_col0, _col1
- Group By Operator [GBY_24] (rows=1 width=8)
- Output:["_col0","_col1"],keys:_col0, _col1
- Select Operator [SEL_23] (rows=1 width=8)
- Output:["_col0","_col1"]
- Filter Operator [FIL_22] (rows=1 width=8)
- predicate:false
- TableScan [TS_3] (rows=5 width=3)
- default@table3_n0,table3_n0,Tbl:COMPLETE,Col:COMPLETE
+ limit:0
PREHOOK: query: select table1_n10.id, table1_n10.val, table1_n10.val1 from table1_n10 left semi join table3_n0 on table1_n10.dimid = table3_n0.id and table3_n0.id = 100 where table1_n10.dimid <> 100
PREHOOK: type: QUERY
@@ -390,37 +362,9 @@ POSTHOOK: Input: default@table3_n0
#### A masked pattern was here ####
Plan optimized by CBO.
-Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
-
Stage-0
Fetch Operator
- limit:-1
- Stage-1
- Reducer 2 llap
- File Output Operator [FS_13]
- Merge Join Operator [MERGEJOIN_18] (rows=1 width=185)
- Conds:RS_21.100, true=RS_25._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"]
- <-Map 1 [SIMPLE_EDGE] vectorized, llap
- SHUFFLE [RS_21]
- PartitionCols:100, true
- Select Operator [SEL_20] (rows=1 width=193)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_19] (rows=1 width=185)
- predicate:false
- TableScan [TS_0] (rows=10 width=185)
- default@table1_n10,table1_n10,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1"]
- <-Map 3 [SIMPLE_EDGE] vectorized, llap
- SHUFFLE [RS_25]
- PartitionCols:_col0, _col1
- Group By Operator [GBY_24] (rows=1 width=8)
- Output:["_col0","_col1"],keys:_col0, _col1
- Select Operator [SEL_23] (rows=1 width=8)
- Output:["_col0","_col1"]
- Filter Operator [FIL_22] (rows=1 width=8)
- predicate:false
- TableScan [TS_3] (rows=5 width=3)
- default@table3_n0,table3_n0,Tbl:COMPLETE,Col:COMPLETE
+ limit:0
PREHOOK: query: select table1_n10.id, table1_n10.val, table1_n10.val1 from table1_n10 left semi join table3_n0 on table1_n10.dimid = table3_n0.id and table3_n0.id = 100 where table1_n10.dimid = 200
PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/llap/dec_str.q.out b/ql/src/test/results/clientpositive/llap/dec_str.q.out
index ce509a7..b0b441e 100644
--- a/ql/src/test/results/clientpositive/llap/dec_str.q.out
+++ b/ql/src/test/results/clientpositive/llap/dec_str.q.out
@@ -98,16 +98,9 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
- TableScan
- alias: t1
- Filter Operator
- predicate: false (type: boolean)
- Select Operator
- expressions: a (type: decimal(3,1))
- outputColumnNames: _col0
- ListSink
+ ListSink
PREHOOK: query: explain select * from t1 where a = 'ab'
PREHOOK: type: QUERY
@@ -123,14 +116,7 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
- TableScan
- alias: t1
- Filter Operator
- predicate: false (type: boolean)
- Select Operator
- expressions: a (type: decimal(3,1))
- outputColumnNames: _col0
- ListSink
+ ListSink
diff --git a/ql/src/test/results/clientpositive/llap/external_jdbc_table_perf.q.out b/ql/src/test/results/clientpositive/llap/external_jdbc_table_perf.q.out
index 1c233d0..ff4a9e3 100644
--- a/ql/src/test/results/clientpositive/llap/external_jdbc_table_perf.q.out
+++ b/ql/src/test/results/clientpositive/llap/external_jdbc_table_perf.q.out
@@ -2256,7 +2256,7 @@ FROM "STORE_SALES"
WHERE "ss_customer_sk" IS NOT NULL AND "ss_sold_date_sk" IS NOT NULL) AS "t0"
INNER JOIN (SELECT "d_date_sk"
FROM "DATE_DIM"
-WHERE "d_year" = 1999 AND "d_moy" BETWEEN 1 AND 3 AND "d_date_sk" IS NOT NULL) AS "t2" ON "t0"."ss_sold_date_sk" = "t2"."d_date_sk"
+WHERE "d_moy" BETWEEN 1 AND 3 AND "d_year" = 1999 AND "d_date_sk" IS NOT NULL) AS "t2" ON "t0"."ss_sold_date_sk" = "t2"."d_date_sk"
hive.sql.query.fieldNames ss_customer_sk0
hive.sql.query.fieldTypes int
hive.sql.query.split false
@@ -2289,7 +2289,7 @@ FROM "WEB_SALES"
WHERE "ws_bill_customer_sk" IS NOT NULL AND "ws_sold_date_sk" IS NOT NULL) AS "t0"
INNER JOIN (SELECT "d_date_sk"
FROM "DATE_DIM"
-WHERE "d_year" = 1999 AND "d_moy" BETWEEN 1 AND 3 AND "d_date_sk" IS NOT NULL) AS "t2" ON "t0"."ws_sold_date_sk" = "t2"."d_date_sk"
+WHERE "d_moy" BETWEEN 1 AND 3 AND "d_year" = 1999 AND "d_date_sk" IS NOT NULL) AS "t2" ON "t0"."ws_sold_date_sk" = "t2"."d_date_sk"
GROUP BY "t0"."ws_bill_customer_sk"
hive.sql.query.fieldNames literalTrue,ws_bill_customer_sk0
hive.sql.query.fieldTypes boolean,int
@@ -2318,7 +2318,7 @@ FROM "CATALOG_SALES"
WHERE "cs_ship_customer_sk" IS NOT NULL AND "cs_sold_date_sk" IS NOT NULL) AS "t0"
INNER JOIN (SELECT "d_date_sk"
FROM "DATE_DIM"
-WHERE "d_year" = 1999 AND "d_moy" BETWEEN 1 AND 3 AND "d_date_sk" IS NOT NULL) AS "t2" ON "t0"."cs_sold_date_sk" = "t2"."d_date_sk"
+WHERE "d_moy" BETWEEN 1 AND 3 AND "d_year" = 1999 AND "d_date_sk" IS NOT NULL) AS "t2" ON "t0"."cs_sold_date_sk" = "t2"."d_date_sk"
GROUP BY "t0"."cs_ship_customer_sk"
hive.sql.query.fieldNames literalTrue,cs_ship_customer_sk0
hive.sql.query.fieldTypes boolean,int
diff --git a/ql/src/test/results/clientpositive/llap/filter_union.q.out b/ql/src/test/results/clientpositive/llap/filter_union.q.out
index ef94551..3a968ca 100644
--- a/ql/src/test/results/clientpositive/llap/filter_union.q.out
+++ b/ql/src/test/results/clientpositive/llap/filter_union.q.out
@@ -524,22 +524,28 @@ STAGE PLANS:
TableScan
alias: src
Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count(key)
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Top N Key Operator
+ sort order: +
keys: key (type: string)
- minReductionHashAggr: 0.0
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 0
+ Group By Operator
+ aggregations: count(key)
+ keys: key (type: string)
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
@@ -550,18 +556,21 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: bigint), 4 (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 99 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint), 4 (type: int)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 99 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 99 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/llap/mapjoin2.q.out b/ql/src/test/results/clientpositive/llap/mapjoin2.q.out
index a5f1dde..7266888 100644
--- a/ql/src/test/results/clientpositive/llap/mapjoin2.q.out
+++ b/ql/src/test/results/clientpositive/llap/mapjoin2.q.out
@@ -26,7 +26,7 @@ POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@tbl_n1
POSTHOOK: Lineage: tbl_n1.n SCRIPT []
POSTHOOK: Lineage: tbl_n1.t SCRIPT []
-Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: explain
select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n
PREHOOK: type: QUERY
@@ -90,17 +90,24 @@ STAGE PLANS:
TableScan
alias: tbl_n1
Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: n (type: bigint), t (type: string)
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: n (type: bigint), t (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
+ Filter Operator
+ predicate: (_col0 = 1L) (type: boolean)
Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint), _col1 (type: string)
+ Select Operator
+ expressions: 1L (type: bigint), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: string)
Execution mode: vectorized, llap
LLAP IO: no inputs
@@ -110,7 +117,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n
PREHOOK: type: QUERY
PREHOOK: Input: default@tbl_n1
@@ -120,7 +127,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@tbl_n1
#### A masked pattern was here ####
1 one true true
-Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 2' is a cross product
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 2' is a cross product
PREHOOK: query: explain
select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n
PREHOOK: type: QUERY
@@ -148,17 +155,24 @@ STAGE PLANS:
TableScan
alias: tbl_n1
Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: n (type: bigint), t (type: string)
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: n (type: bigint), t (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
+ Filter Operator
+ predicate: (_col0 = 2L) (type: boolean)
Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint), _col1 (type: string)
+ Select Operator
+ expressions: 2L (type: bigint), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: string)
Execution mode: vectorized, llap
LLAP IO: no inputs
Map 2
@@ -207,7 +221,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 2' is a cross product
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 2' is a cross product
PREHOOK: query: select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n
PREHOOK: type: QUERY
PREHOOK: Input: default@tbl_n1
diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out
index 1a7fc91..41c6e68 100644
--- a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out
+++ b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out
@@ -449,8 +449,8 @@ POSTHOOK: Lineage: part_null_n1.p_partkey SCRIPT []
POSTHOOK: Lineage: part_null_n1.p_retailprice SCRIPT []
POSTHOOK: Lineage: part_null_n1.p_size SCRIPT []
POSTHOOK: Lineage: part_null_n1.p_type SCRIPT []
-Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
-Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
PREHOOK: query: explain select /*+ mapjoin(None)*/ * from part where p_name = (select p_name from part_null_n1 where p_name is null)
PREHOOK: type: QUERY
PREHOOK: Input: default@part
@@ -471,7 +471,7 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE)
- Reducer 3 <- Map 4 (XPROD_EDGE), Reducer 2 (XPROD_EDGE)
+ Reducer 3 <- Map 6 (XPROD_EDGE), Reducer 2 (XPROD_EDGE)
Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
@@ -480,12 +480,12 @@ STAGE PLANS:
TableScan
alias: part
Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Operator
+ expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
@@ -497,25 +497,44 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: part_null_n1
- Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ filterExpr: p_name is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: p_name is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_partkey (type: int)
+ outputColumnNames: p_partkey
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(0)
- minReductionHashAggr: 0.0
+ aggregations: count(p_partkey)
+ minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: part_null_n1
+ filterExpr: p_name is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: p_name is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
@@ -528,10 +547,10 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 816 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 816 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
Reducer 3
Execution mode: llap
@@ -543,10 +562,10 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -558,12 +577,12 @@ STAGE PLANS:
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
Stage: Stage-0
Fetch Operator
@@ -571,8 +590,8 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Map Join MAPJOIN[26][bigTable=?] in task 'Map 1' is a cross product
-Warning: Map Join MAPJOIN[25][bigTable=?] in task 'Map 1' is a cross product
+Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Map 1' is a cross product
+Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: explain select * from part where p_name = (select p_name from part_null_n1 where p_name is null)
PREHOOK: type: QUERY
PREHOOK: Input: default@part
@@ -592,7 +611,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 2 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE)
+ Map 1 <- Map 4 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE)
Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
@@ -601,12 +620,12 @@ STAGE PLANS:
TableScan
alias: part
Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Operator
+ expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
@@ -617,7 +636,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
input vertices:
1 Reducer 3
- Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 816 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -626,11 +645,11 @@ STAGE PLANS:
1
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
input vertices:
- 1 Map 2
- Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
+ 1 Map 4
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -641,25 +660,44 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: part_null_n1
- Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ filterExpr: p_name is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: p_name is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_partkey (type: int)
+ outputColumnNames: p_partkey
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(0)
- minReductionHashAggr: 0.0
+ aggregations: count(p_partkey)
+ minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: part_null_n1
+ filterExpr: p_name is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: p_name is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 3
@@ -669,12 +707,12 @@ STAGE PLANS:
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/llap/mergejoin.q.out b/ql/src/test/results/clientpositive/llap/mergejoin.q.out
index 782fcb1..0db232a 100644
--- a/ql/src/test/results/clientpositive/llap/mergejoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/mergejoin.q.out
@@ -4046,29 +4046,34 @@ POSTHOOK: Input: default@tab_part_n10@ds=2008-04-08
0 val_0 2008-04-08 NULL NULL NULL
NULL NULL NULL 98 val_98 2008-04-08
NULL NULL NULL 98 val_98 2008-04-08
-Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: select * from (select * from tab_n9 where tab_n9.key = 0)a right outer join (select * from tab_part_n10 where tab_part_n10.key = 98)b on a.key = b.key
PREHOOK: type: QUERY
PREHOOK: Input: default@tab_n9
+PREHOOK: Input: default@tab_n9@ds=2008-04-08
PREHOOK: Input: default@tab_part_n10
PREHOOK: Input: default@tab_part_n10@ds=2008-04-08
#### A masked pattern was here ####
POSTHOOK: query: select * from (select * from tab_n9 where tab_n9.key = 0)a right outer join (select * from tab_part_n10 where tab_part_n10.key = 98)b on a.key = b.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_n9
+POSTHOOK: Input: default@tab_n9@ds=2008-04-08
POSTHOOK: Input: default@tab_part_n10
POSTHOOK: Input: default@tab_part_n10@ds=2008-04-08
#### A masked pattern was here ####
NULL NULL NULL 98 val_98 2008-04-08
NULL NULL NULL 98 val_98 2008-04-08
-Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
PREHOOK: query: select * from
(select * from tab_n9 where tab_n9.key = 0)a
full outer join
(select * from tab_part_n10 where tab_part_n10.key = 98)b join tab_part_n10 c on a.key = b.key and b.key = c.key
PREHOOK: type: QUERY
PREHOOK: Input: default@tab_n9
+PREHOOK: Input: default@tab_n9@ds=2008-04-08
PREHOOK: Input: default@tab_part_n10
+PREHOOK: Input: default@tab_part_n10@ds=2008-04-08
#### A masked pattern was here ####
POSTHOOK: query: select * from
(select * from tab_n9 where tab_n9.key = 0)a
@@ -4076,16 +4081,19 @@ full outer join
(select * from tab_part_n10 where tab_part_n10.key = 98)b join tab_part_n10 c on a.key = b.key and b.key = c.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_n9
+POSTHOOK: Input: default@tab_n9@ds=2008-04-08
POSTHOOK: Input: default@tab_part_n10
+POSTHOOK: Input: default@tab_part_n10@ds=2008-04-08
#### A masked pattern was here ####
-Warning: Shuffle Join MERGEJOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
-Warning: Shuffle Join MERGEJOIN[21][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
PREHOOK: query: select * from
(select * from tab_n9 where tab_n9.key = 0)a
full outer join
(select * from tab_part_n10 where tab_part_n10.key = 98)b on a.key = b.key join tab_part_n10 c on b.key = c.key
PREHOOK: type: QUERY
PREHOOK: Input: default@tab_n9
+PREHOOK: Input: default@tab_n9@ds=2008-04-08
PREHOOK: Input: default@tab_part_n10
PREHOOK: Input: default@tab_part_n10@ds=2008-04-08
#### A masked pattern was here ####
@@ -4095,6 +4103,7 @@ full outer join
(select * from tab_part_n10 where tab_part_n10.key = 98)b on a.key = b.key join tab_part_n10 c on b.key = c.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_n9
+POSTHOOK: Input: default@tab_n9@ds=2008-04-08
POSTHOOK: Input: default@tab_part_n10
POSTHOOK: Input: default@tab_part_n10@ds=2008-04-08
#### A masked pattern was here ####
@@ -4630,13 +4639,15 @@ NULL NULL NULL NULL NULL NULL 97 val_97 2008-04-08
NULL NULL NULL NULL NULL NULL 97 val_97 2008-04-08
NULL NULL NULL NULL NULL NULL 98 val_98 2008-04-08
NULL NULL NULL NULL NULL NULL 98 val_98 2008-04-08
-Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[21][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product
+Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: select * from
(select * from tab_n9 where tab_n9.key = 0)a
join
(select * from tab_part_n10 where tab_part_n10.key = 98)b on a.key = b.key full outer join tab_part_n10 c on b.key = c.key
PREHOOK: type: QUERY
PREHOOK: Input: default@tab_n9
+PREHOOK: Input: default@tab_n9@ds=2008-04-08
PREHOOK: Input: default@tab_part_n10
PREHOOK: Input: default@tab_part_n10@ds=2008-04-08
#### A masked pattern was here ####
@@ -4646,6 +4657,7 @@ join
(select * from tab_part_n10 where tab_part_n10.key = 98)b on a.key = b.key full outer join tab_part_n10 c on b.key = c.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_n9
+POSTHOOK: Input: default@tab_n9@ds=2008-04-08
POSTHOOK: Input: default@tab_part_n10
POSTHOOK: Input: default@tab_part_n10@ds=2008-04-08
#### A masked pattern was here ####
diff --git a/ql/src/test/results/clientpositive/llap/multi_in_clause.q.out b/ql/src/test/results/clientpositive/llap/multi_in_clause.q.out
index f3fefc4..985c2a4 100644
--- a/ql/src/test/results/clientpositive/llap/multi_in_clause.q.out
+++ b/ql/src/test/results/clientpositive/llap/multi_in_clause.q.out
@@ -26,8 +26,8 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@very_simple_table_for_in_test
#### A masked pattern was here ####
CBO PLAN:
-HiveProject(name=[$0])
- HiveFilter(condition=[false])
+HiveSortLimit(fetch=[0])
+ HiveProject(name=[$0])
HiveTableScan(table=[[default, very_simple_table_for_in_test]], table:alias=[very_simple_table_for_in_test])
PREHOOK: query: select * from very_simple_table_for_in_test where name IN('g','r') AND name IN('a','b')
diff --git a/ql/src/test/results/clientpositive/llap/semijoin.q.out b/ql/src/test/results/clientpositive/llap/semijoin.q.out
index c33b7bb..b056b9b 100644
--- a/ql/src/test/results/clientpositive/llap/semijoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/semijoin.q.out
@@ -3277,7 +3277,7 @@ POSTHOOK: Input: default@part
#### A masked pattern was here ####
CBO PLAN:
HiveProject(p_partkey=[$0])
- HiveSemiJoin(condition=[=($1, $2)], joinType=[inner])
+ HiveSemiJoin(condition=[=($1, $2)], joinType=[semi])
HiveProject(p_partkey=[$0], p_name=[$1])
HiveFilter(condition=[IS NOT NULL($1)])
HiveTableScan(table=[[default, part]], table:alias=[pp])
@@ -3339,7 +3339,7 @@ POSTHOOK: Input: default@part
#### A masked pattern was here ####
CBO PLAN:
HiveAggregate(group=[{}], agg#0=[count()])
- HiveSemiJoin(condition=[=($0, $1)], joinType=[inner])
+ HiveSemiJoin(condition=[=($0, $1)], joinType=[semi])
HiveProject(p_partkey=[$0])
HiveFilter(condition=[IS NOT NULL($0)])
HiveTableScan(table=[[default, part]], table:alias=[pp])
diff --git a/ql/src/test/results/clientpositive/llap/subquery_ALL.q.out b/ql/src/test/results/clientpositive/llap/subquery_ALL.q.out
index cfb1243..7db69f8 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_ALL.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_ALL.q.out
@@ -406,8 +406,8 @@ POSTHOOK: Input: default@part
POSTHOOK: Input: default@part_null_n0
#### A masked pattern was here ####
26
-Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
-Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product
+Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product
PREHOOK: query: select count(*) from part where (p_partkey <> ALL (select p_partkey from part_null_n0 where p_partkey is null)) is null
PREHOOK: type: QUERY
PREHOOK: Input: default@part
diff --git a/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out b/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out
index 2c01b01..8ac0531 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out
@@ -88,7 +88,7 @@ POSTHOOK: Input: default@part
#### A masked pattern was here ####
CBO PLAN:
HiveAggregate(group=[{}], agg#0=[count()])
- HiveSemiJoin(condition=[=($0, $1)], joinType=[inner])
+ HiveSemiJoin(condition=[=($0, $1)], joinType=[semi])
HiveProject(p_partkey=[$0])
HiveFilter(condition=[IS NOT NULL($0)])
HiveTableScan(table=[[default, part]], table:alias=[part])
@@ -320,8 +320,8 @@ POSTHOOK: Input: default@part
POSTHOOK: Input: default@part_null_n0
#### A masked pattern was here ####
26
-Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
-Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product
+Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product
PREHOOK: query: select count(*) from part where (p_partkey = ANY (select p_partkey from part_null_n0 where p_partkey is null)) is null
PREHOOK: type: QUERY
PREHOOK: Input: default@part
diff --git a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
index e3b3f73..3c042f6 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
@@ -1024,8 +1024,8 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(0)
diff --git a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out
index 67540b1..b4c9656 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out
@@ -1777,7 +1777,7 @@ HiveProject(p_partkey=[$0], p_name=[$1], p_mfgr=[$2], p_brand=[$3], p_type=[$4],
HiveTableScan(table=[[default, part_null]], table:alias=[part_null])
HiveProject(p_type=[$0], c=[$1], ck=[$2])
HiveAggregate(group=[{1}], c=[COUNT()], ck=[COUNT($2)])
- HiveSemiJoin(condition=[AND(=($1, $4), =($0, $3))], joinType=[inner])
+ HiveSemiJoin(condition=[AND(=($1, $4), =($0, $3))], joinType=[semi])
HiveProject(p_brand=[$3], p_type=[$4], p_container=[$6])
HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($3))])
HiveTableScan(table=[[default, part]], table:alias=[part])
@@ -1786,7 +1786,7 @@ HiveProject(p_partkey=[$0], p_name=[$1], p_mfgr=[$2], p_brand=[$3], p_type=[$4],
HiveTableScan(table=[[default, part]], table:alias=[pp])
HiveProject(p_container=[$1], literalTrue=[true], p_type=[$0])
HiveAggregate(group=[{1, 2}])
- HiveSemiJoin(condition=[AND(=($1, $4), =($0, $3))], joinType=[inner])
+ HiveSemiJoin(condition=[AND(=($1, $4), =($0, $3))], joinType=[semi])
HiveProject(p_brand=[$3], p_type=[$4], p_container=[$6])
HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($3), IS NOT NULL($6))])
HiveTableScan(table=[[default, part]], table:alias=[part])
diff --git a/ql/src/test/results/clientpositive/llap/subquery_null_agg.q.out b/ql/src/test/results/clientpositive/llap/subquery_null_agg.q.out
index 6de5362..7c44365 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_null_agg.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_null_agg.q.out
@@ -6,8 +6,8 @@ POSTHOOK: query: CREATE TABLE table_7 (int_col INT)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@table_7
-Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
-Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
PREHOOK: query: explain
SELECT
(t1.int_col) * (t1.int_col) AS int_col
@@ -74,9 +74,9 @@ STAGE PLANS:
Reduce Output Operator
sort order:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Group By Operator
aggregations: count()
minReductionHashAggr: 0.99
@@ -96,16 +96,16 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Select Operator
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Select Operator
expressions: true (type: boolean)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
value expressions: _col0 (type: boolean)
Execution mode: vectorized, llap
LLAP IO: no inputs
@@ -119,10 +119,10 @@ STAGE PLANS:
0
1
outputColumnNames: _col1
- Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: _col1 (type: boolean)
Reducer 3
Execution mode: llap
@@ -134,21 +134,21 @@ STAGE PLANS:
0
1
outputColumnNames: _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 10 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Select Operator
expressions: _col2 (type: bigint), _col3 (type: bigint), _col1 (type: boolean)
outputColumnNames: _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 10 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Filter Operator
predicate: ((_col1 = 0L) or (_col3 is null and (_col2 >= _col1))) (type: boolean)
- Statistics: Num rows: 1 Data size: 10 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Select Operator
expressions: null (type: void)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 10 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 10 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
index 888e2fa..6aecf7e 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
@@ -370,8 +370,8 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
-Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
PREHOOK: query: explain select * from part where p_name = (select p_name from part_null_n0 where p_name is null)
PREHOOK: type: QUERY
PREHOOK: Input: default@part
@@ -392,7 +392,7 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE)
- Reducer 3 <- Map 4 (XPROD_EDGE), Reducer 2 (XPROD_EDGE)
+ Reducer 3 <- Map 6 (XPROD_EDGE), Reducer 2 (XPROD_EDGE)
Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
@@ -401,12 +401,12 @@ STAGE PLANS:
TableScan
alias: part
Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Operator
+ expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
@@ -418,25 +418,44 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: part_null_n0
- Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ filterExpr: p_name is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: p_name is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_partkey (type: int)
+ outputColumnNames: p_partkey
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(0)
- minReductionHashAggr: 0.0
+ aggregations: count(p_partkey)
+ minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: part_null_n0
+ filterExpr: p_name is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: p_name is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
@@ -449,10 +468,10 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 816 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 816 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
Reducer 3
Execution mode: llap
@@ -464,10 +483,10 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -479,12 +498,12 @@ STAGE PLANS:
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE
Stage: Stage-0
Fetch Operator
@@ -492,8 +511,8 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
-Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
PREHOOK: query: select * from part where p_name = (select p_name from part_null_n0 where p_name is null)
PREHOOK: type: QUERY
PREHOOK: Input: default@part
diff --git a/ql/src/test/results/clientpositive/llap/union_assertion_type.q.out b/ql/src/test/results/clientpositive/llap/union_assertion_type.q.out
index d998339..e69a469 100644
--- a/ql/src/test/results/clientpositive/llap/union_assertion_type.q.out
+++ b/ql/src/test/results/clientpositive/llap/union_assertion_type.q.out
@@ -115,21 +115,76 @@ POSTHOOK: Input: default@union_table_test_n0
POSTHOOK: Input: default@union_table_test_n1
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Union 2 (CONTAINS)
+ Map 3 <- Union 2 (CONTAINS)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: union_table_test_n1
+ Statistics: Num rows: 4 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: column1 (type: string), column2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 4 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), '5' (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: union_table_test_n0
+ Statistics: Num rows: 4 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: column1 (type: string), column2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 4 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), '5' (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Union 2
+ Vertex: Union 2
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: union_table_test_n0
- Filter Operator
- predicate: false (type: boolean)
- Select Operator
- expressions: column1 (type: string), column2 (type: string), '5' (type: string)
- outputColumnNames: _col0, _col1, _col2
- ListSink
+ ListSink
PREHOOK: query: SELECT column1, x.column2, x.column3 FROM (
SELECT column1, column2, '5' as column3 FROM union_table_test_n1
@@ -290,8 +345,8 @@ STAGE PLANS:
Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
diff --git a/ql/src/test/results/clientpositive/mapjoin2.q.out b/ql/src/test/results/clientpositive/mapjoin2.q.out
index e4cd76d..3ef5b0e 100644
--- a/ql/src/test/results/clientpositive/mapjoin2.q.out
+++ b/ql/src/test/results/clientpositive/mapjoin2.q.out
@@ -26,7 +26,7 @@ POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@tbl_n1
POSTHOOK: Lineage: tbl_n1.n SCRIPT []
POSTHOOK: Lineage: tbl_n1.t SCRIPT []
-Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
PREHOOK: query: explain
select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n
PREHOOK: type: QUERY
@@ -46,25 +46,32 @@ STAGE PLANS:
Stage: Stage-4
Map Reduce Local Work
Alias -> Map Local Tables:
- $hdt$_1:tbl_n1
+ $hdt$_1:$hdt$_1:$hdt$_1:tbl_n1
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- $hdt$_1:tbl_n1
+ $hdt$_1:$hdt$_1:$hdt$_1:tbl_n1
TableScan
alias: tbl_n1
Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: n (type: bigint), t (type: string)
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: n (type: bigint), t (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- HashTable Sink Operator
- keys:
- 0
- 1
+ Filter Operator
+ predicate: (_col0 = 1L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 1L (type: bigint), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
+ HashTable Sink Operator
+ keys:
+ 0
+ 1
Stage: Stage-3
Map Reduce
@@ -109,7 +116,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
PREHOOK: query: select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n
PREHOOK: type: QUERY
PREHOOK: Input: default@tbl_n1
@@ -119,7 +126,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@tbl_n1
#### A masked pattern was here ####
1 one true true
-Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
PREHOOK: query: explain
select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n
PREHOOK: type: QUERY
@@ -139,28 +146,35 @@ STAGE PLANS:
Stage: Stage-4
Map Reduce Local Work
Alias -> Map Local Tables:
- $hdt$_0:tbl_n1
+ $hdt$_0:$hdt$_0:$hdt$_0:tbl_n1
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- $hdt$_0:tbl_n1
+ $hdt$_0:$hdt$_0:$hdt$_0:tbl_n1
TableScan
alias: tbl_n1
Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: n (type: bigint), t (type: string)
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: n (type: bigint), t (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- HashTable Sink Operator
- filter predicates:
- 0
- 1 {true}
- keys:
- 0
- 1
+ Filter Operator
+ predicate: (_col0 = 2L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 2L (type: bigint), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
+ HashTable Sink Operator
+ filter predicates:
+ 0
+ 1 {true}
+ keys:
+ 0
+ 1
Stage: Stage-3
Map Reduce
@@ -208,7 +222,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
+Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Stage-3:MAPRED' is a cross product
PREHOOK: query: select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n
PREHOOK: type: QUERY
PREHOOK: Input: default@tbl_n1
diff --git a/ql/src/test/results/clientpositive/masking_10.q.out b/ql/src/test/results/clientpositive/masking_10.q.out
index 3f3871b..bc68a08 100644
--- a/ql/src/test/results/clientpositive/masking_10.q.out
+++ b/ql/src/test/results/clientpositive/masking_10.q.out
@@ -87,7 +87,7 @@ POSTHOOK: Input: default@masking_test
2017 2_lav
2017 4_lav
2017 8_lav
-Warning: Shuffle Join JOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[37][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
PREHOOK: query: explain
select * from
masking_test alias01
@@ -117,12 +117,13 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@masking_test
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
Stage-2 depends on stages: Stage-1
Stage-0 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-1
+ Stage: Stage-3
Map Reduce
Map Operator Tree:
TableScan
@@ -133,27 +134,64 @@ STAGE PLANS:
predicate: ((key < 10) and ((key % 2) = 0)) (type: boolean)
Statistics: Num rows: 250 Data size: 44744 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: int), reverse(value) (type: string)
- outputColumnNames: _col0, _col1
+ expressions: reverse(value) (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 44744 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
+ Group By Operator
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 44744 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 44744 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 125 Data size: 22372 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: 2017 (type: int), _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
TableScan
alias: masking_test
- Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE
+ filterExpr: ((key < 10) and ((key % 2) = 0)) (type: boolean)
+ Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((key < 10) and ((key % 2) = 0)) (type: boolean)
+ Statistics: Num rows: 250 Data size: 44744 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: 2017 (type: int), reverse(value) (type: string)
+ expressions: key (type: int), reverse(value) (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 44744 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 44744 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: string)
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string)
Reduce Operator Tree:
Join Operator
condition map:
@@ -162,7 +200,7 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 88744 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -178,7 +216,7 @@ STAGE PLANS:
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 250 Data size: 88744 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string)
TableScan
alias: masking_test
@@ -205,10 +243,10 @@ STAGE PLANS:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 275 Data size: 97618 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 275 Data size: 49218 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 275 Data size: 97618 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 275 Data size: 49218 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/mergejoin.q.out b/ql/src/test/results/clientpositive/mergejoin.q.out
index baf9079..6f4ab79 100644
--- a/ql/src/test/results/clientpositive/mergejoin.q.out
+++ b/ql/src/test/results/clientpositive/mergejoin.q.out
@@ -3362,29 +3362,34 @@ POSTHOOK: Input: default@tab_part_n10@ds=2008-04-08
0 val_0 2008-04-08 NULL NULL NULL
NULL NULL NULL 98 val_98 2008-04-08
NULL NULL NULL 98 val_98 2008-04-08
-Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
PREHOOK: query: select * from (select * from tab_n9 where tab_n9.key = 0)a right outer join (select * from tab_part_n10 where tab_part_n10.key = 98)b on a.key = b.key
PREHOOK: type: QUERY
PREHOOK: Input: default@tab_n9
+PREHOOK: Input: default@tab_n9@ds=2008-04-08
PREHOOK: Input: default@tab_part_n10
PREHOOK: Input: default@tab_part_n10@ds=2008-04-08
#### A masked pattern was here ####
POSTHOOK: query: select * from (select * from tab_n9 where tab_n9.key = 0)a right outer join (select * from tab_part_n10 where tab_part_n10.key = 98)b on a.key = b.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_n9
+POSTHOOK: Input: default@tab_n9@ds=2008-04-08
POSTHOOK: Input: default@tab_part_n10
POSTHOOK: Input: default@tab_part_n10@ds=2008-04-08
#### A masked pattern was here ####
NULL NULL NULL 98 val_98 2008-04-08
NULL NULL NULL 98 val_98 2008-04-08
-Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
PREHOOK: query: select * from
(select * from tab_n9 where tab_n9.key = 0)a
full outer join
(select * from tab_part_n10 where tab_part_n10.key = 98)b join tab_part_n10 c on a.key = b.key and b.key = c.key
PREHOOK: type: QUERY
PREHOOK: Input: default@tab_n9
+PREHOOK: Input: default@tab_n9@ds=2008-04-08
PREHOOK: Input: default@tab_part_n10
+PREHOOK: Input: default@tab_part_n10@ds=2008-04-08
#### A masked pattern was here ####
POSTHOOK: query: select * from
(select * from tab_n9 where tab_n9.key = 0)a
@@ -3392,16 +3397,19 @@ full outer join
(select * from tab_part_n10 where tab_part_n10.key = 98)b join tab_part_n10 c on a.key = b.key and b.key = c.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_n9
+POSTHOOK: Input: default@tab_n9@ds=2008-04-08
POSTHOOK: Input: default@tab_part_n10
+POSTHOOK: Input: default@tab_part_n10@ds=2008-04-08
#### A masked pattern was here ####
-Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
-Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
PREHOOK: query: select * from
(select * from tab_n9 where tab_n9.key = 0)a
full outer join
(select * from tab_part_n10 where tab_part_n10.key = 98)b on a.key = b.key join tab_part_n10 c on b.key = c.key
PREHOOK: type: QUERY
PREHOOK: Input: default@tab_n9
+PREHOOK: Input: default@tab_n9@ds=2008-04-08
PREHOOK: Input: default@tab_part_n10
PREHOOK: Input: default@tab_part_n10@ds=2008-04-08
#### A masked pattern was here ####
@@ -3411,6 +3419,7 @@ full outer join
(select * from tab_part_n10 where tab_part_n10.key = 98)b on a.key = b.key join tab_part_n10 c on b.key = c.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_n9
+POSTHOOK: Input: default@tab_n9@ds=2008-04-08
POSTHOOK: Input: default@tab_part_n10
POSTHOOK: Input: default@tab_part_n10@ds=2008-04-08
#### A masked pattern was here ####
@@ -3946,13 +3955,15 @@ NULL NULL NULL NULL NULL NULL 97 val_97 2008-04-08
NULL NULL NULL NULL NULL NULL 97 val_97 2008-04-08
NULL NULL NULL NULL NULL NULL 98 val_98 2008-04-08
NULL NULL NULL NULL NULL NULL 98 val_98 2008-04-08
-Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[12][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
PREHOOK: query: select * from
(select * from tab_n9 where tab_n9.key = 0)a
join
(select * from tab_part_n10 where tab_part_n10.key = 98)b on a.key = b.key full outer join tab_part_n10 c on b.key = c.key
PREHOOK: type: QUERY
PREHOOK: Input: default@tab_n9
+PREHOOK: Input: default@tab_n9@ds=2008-04-08
PREHOOK: Input: default@tab_part_n10
PREHOOK: Input: default@tab_part_n10@ds=2008-04-08
#### A masked pattern was here ####
@@ -3962,6 +3973,7 @@ join
(select * from tab_part_n10 where tab_part_n10.key = 98)b on a.key = b.key full outer join tab_part_n10 c on b.key = c.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_n9
+POSTHOOK: Input: default@tab_n9@ds=2008-04-08
POSTHOOK: Input: default@tab_part_n10
POSTHOOK: Input: default@tab_part_n10@ds=2008-04-08
#### A masked pattern was here ####
diff --git a/ql/src/test/results/clientpositive/optimize_filter_literal.q.out b/ql/src/test/results/clientpositive/optimize_filter_literal.q.out
index 4ef0f51..7542d76 100644
--- a/ql/src/test/results/clientpositive/optimize_filter_literal.q.out
+++ b/ql/src/test/results/clientpositive/optimize_filter_literal.q.out
@@ -136,14 +136,17 @@ POSTHOOK: Input: default@tab_n14@ds=2008-04-08
POSTHOOK: Output: default@tab_n14
POSTHOOK: Output: default@tab_n14@ds=2008-04-08
#### A masked pattern was here ####
-Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
PREHOOK: query: select * from
(select * from tab_n14 where tab_n14.key = 0)a
full outer join
(select * from tab_part_n13 where tab_part_n13.key = 98)b join tab_part_n13 c on a.key = b.key and b.key = c.key
PREHOOK: type: QUERY
PREHOOK: Input: default@tab_n14
+PREHOOK: Input: default@tab_n14@ds=2008-04-08
PREHOOK: Input: default@tab_part_n13
+PREHOOK: Input: default@tab_part_n13@ds=2008-04-08
#### A masked pattern was here ####
POSTHOOK: query: select * from
(select * from tab_n14 where tab_n14.key = 0)a
@@ -151,5 +154,7 @@ full outer join
(select * from tab_part_n13 where tab_part_n13.key = 98)b join tab_part_n13 c on a.key = b.key and b.key = c.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tab_n14
+POSTHOOK: Input: default@tab_n14@ds=2008-04-08
POSTHOOK: Input: default@tab_part_n13
+POSTHOOK: Input: default@tab_part_n13@ds=2008-04-08
#### A masked pattern was here ####
diff --git a/ql/src/test/results/clientpositive/partition_boolexpr.q.out b/ql/src/test/results/clientpositive/partition_boolexpr.q.out
index 15198e5..cfb845b 100644
--- a/ql/src/test/results/clientpositive/partition_boolexpr.q.out
+++ b/ql/src/test/results/clientpositive/partition_boolexpr.q.out
@@ -70,8 +70,8 @@ STAGE PLANS:
Statistics: Num rows: 2000 Data size: 37248 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
@@ -187,8 +187,8 @@ STAGE PLANS:
Statistics: Num rows: 2000 Data size: 37248 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query10.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query10.q.out
index 9df533b..e076022 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query10.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query10.q.out
@@ -137,7 +137,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$4], sort4=[$6], sort5=
HiveFilter(condition=[OR(IS NOT NULL($14), IS NOT NULL($16))])
HiveJoin(condition=[=($0, $17)], joinType=[left], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($0, $15)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[=($0, $14)], joinType=[inner])
+ HiveSemiJoin(condition=[=($0, $14)], joinType=[semi])
HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out
index 9167a9c..e9308cd 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out
@@ -231,7 +231,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveFilter(condition=[IS NOT NULL($3)])
HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()])
HiveProject(i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], $f3=[*(CAST($6):DECIMAL(10, 0), $7)])
- HiveSemiJoin(condition=[=($5, $9)], joinType=[inner])
+ HiveSemiJoin(condition=[=($5, $9)], joinType=[semi])
HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11])
HiveFilter(condition=[IS NOT NULL($0)])
@@ -328,7 +328,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveFilter(condition=[IS NOT NULL($3)])
HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()])
HiveProject(i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], $f3=[*(CAST($6):DECIMAL(10, 0), $7)])
- HiveSemiJoin(condition=[=($5, $9)], joinType=[inner])
+ HiveSemiJoin(condition=[=($5, $9)], joinType=[semi])
HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11])
HiveFilter(condition=[IS NOT NULL($0)])
@@ -425,7 +425,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[
HiveFilter(condition=[IS NOT NULL($3)])
HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()])
HiveProject(i_brand_id=[$1], i_class_id=[$2], i_category_id=[$3], $f3=[*(CAST($6):DECIMAL(10, 0), $7)])
- HiveSemiJoin(condition=[=($5, $9)], joinType=[inner])
+ HiveSemiJoin(condition=[=($5, $9)], joinType=[semi])
HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11])
HiveFilter(condition=[IS NOT NULL($0)])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out
index 8126e43..1bfd9d8 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out
@@ -74,7 +74,7 @@ CBO PLAN:
HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)])
HiveFilter(condition=[IS NULL($13)])
HiveJoin(condition=[=($4, $14)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[inner])
+ HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[semi])
HiveProject(cs_ship_date_sk=[$2], cs_ship_addr_sk=[$3], cs_call_center_sk=[$4], cs_warehouse_sk=[$5], cs_order_number=[$6], cs_ext_ship_cost=[$7], cs_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], cc_call_center_sk=[$11], cc_county=[$12])
HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
index bf8a275..0ad4660 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
@@ -121,7 +121,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)])
HiveProject(sales=[$0])
HiveUnion(all=[true])
HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)])
- HiveSemiJoin(condition=[=($2, $7)], joinType=[inner])
+ HiveSemiJoin(condition=[=($2, $7)], joinType=[semi])
HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject($f1=[$0])
HiveAggregate(group=[{1}])
@@ -176,7 +176,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)])
HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)])
- HiveSemiJoin(condition=[=($3, $7)], joinType=[inner])
+ HiveSemiJoin(condition=[=($3, $7)], joinType=[semi])
HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject($f1=[$0])
HiveAggregate(group=[{1}])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query35.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query35.q.out
index dcfc53b..0eb3f70 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query35.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query35.q.out
@@ -134,7 +134,7 @@ HiveProject(ca_state=[$0], cd_gender=[$1], cd_marital_status=[$2], cnt1=[$3], _o
HiveFilter(condition=[OR(IS NOT NULL($11), IS NOT NULL($13))])
HiveJoin(condition=[=($0, $14)], joinType=[left], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($0, $12)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[=($0, $11)], joinType=[inner])
+ HiveSemiJoin(condition=[=($0, $11)], joinType=[semi])
HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query69.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query69.q.out
index f0cca27..4e617f9 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query69.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query69.q.out
@@ -115,7 +115,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$4], sort4=[$6], dir0=[
HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$1], c_current_addr_sk=[$2], ca_address_sk=[$3], ca_state=[$4], cd_demo_sk=[$5], cd_gender=[$6], cd_marital_status=[$7], cd_education_status=[$8], cd_purchase_estimate=[$9], cd_credit_rating=[$10], literalTrue=[$11], ws_bill_customer_sk0=[$12])
HiveFilter(condition=[IS NULL($11)])
HiveJoin(condition=[=($0, $12)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[=($0, $11)], joinType=[inner])
+ HiveSemiJoin(condition=[=($0, $11)], joinType=[semi])
HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out
index 5ee3bfc..4741932 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query83.q.out
@@ -164,7 +164,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
HiveProject(d_date=[$0])
HiveAggregate(group=[{0}])
- HiveSemiJoin(condition=[=($1, $2)], joinType=[inner])
+ HiveSemiJoin(condition=[=($1, $2)], joinType=[semi])
HiveProject(d_date=[$2], d_week_seq=[$4])
HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
@@ -188,7 +188,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
HiveProject(d_date=[$0])
HiveAggregate(group=[{0}])
- HiveSemiJoin(condition=[=($1, $2)], joinType=[inner])
+ HiveSemiJoin(condition=[=($1, $2)], joinType=[semi])
HiveProject(d_date=[$2], d_week_seq=[$4])
HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
@@ -212,7 +212,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
HiveProject(d_date=[$0])
HiveAggregate(group=[{0}])
- HiveSemiJoin(condition=[=($1, $2)], joinType=[inner])
+ HiveSemiJoin(condition=[=($1, $2)], joinType=[semi])
HiveProject(d_date=[$2], d_week_seq=[$4])
HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out
index 5cf486f..b8521f4 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out
@@ -70,7 +70,7 @@ CBO PLAN:
HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)])
HiveFilter(condition=[IS NULL($13)])
HiveJoin(condition=[=($4, $14)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[inner])
+ HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[semi])
HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_warehouse_sk=[$5], ws_order_number=[$6], ws_ext_ship_cost=[$7], ws_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], web_site_sk=[$11], web_company_name=[$12])
HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out
index 2427cbd..8633ba2 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out
@@ -137,7 +137,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$4], sort4=[$6], sort5=
HiveFilter(condition=[OR(IS NOT NULL($14), IS NOT NULL($16))])
HiveJoin(condition=[=($0, $17)], joinType=[left], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($0, $15)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[=($0, $14)], joinType=[inner])
+ HiveSemiJoin(condition=[=($0, $14)], joinType=[semi])
HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out
index 13c6477..b40ca94 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out
@@ -74,7 +74,7 @@ CBO PLAN:
HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)])
HiveFilter(condition=[IS NULL($13)])
HiveJoin(condition=[=($4, $14)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[inner])
+ HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[semi])
HiveProject(cs_ship_date_sk=[$2], cs_ship_addr_sk=[$3], cs_call_center_sk=[$4], cs_warehouse_sk=[$5], cs_order_number=[$6], cs_ext_ship_cost=[$7], cs_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], cc_call_center_sk=[$11], cc_county=[$12])
HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out
index ca7c3b3..bae58bd 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out
@@ -121,7 +121,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)])
HiveProject(sales=[$0])
HiveUnion(all=[true])
HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)])
- HiveSemiJoin(condition=[=($3, $7)], joinType=[inner])
+ HiveSemiJoin(condition=[=($3, $7)], joinType=[semi])
HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(ss_customer_sk=[$0])
HiveAggregate(group=[{0}])
@@ -167,7 +167,7 @@ HiveAggregate(group=[{}], agg#0=[sum($0)])
HiveProject(i_item_sk=[$0], substr=[substr($4, 1, 30)])
HiveTableScan(table=[[default, item]], table:alias=[item])
HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)])
- HiveSemiJoin(condition=[=($2, $7)], joinType=[inner])
+ HiveSemiJoin(condition=[=($2, $7)], joinType=[semi])
HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(ss_customer_sk=[$0])
HiveAggregate(group=[{0}])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query35.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query35.q.out
index d3d6658..2de6818 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query35.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query35.q.out
@@ -134,7 +134,7 @@ HiveProject(ca_state=[$0], cd_gender=[$1], cd_marital_status=[$2], cnt1=[$3], _o
HiveFilter(condition=[OR(IS NOT NULL($11), IS NOT NULL($13))])
HiveJoin(condition=[=($0, $14)], joinType=[left], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($0, $12)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[=($0, $11)], joinType=[inner])
+ HiveSemiJoin(condition=[=($0, $11)], joinType=[semi])
HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$1], c_current_addr_sk=[$2], ca_address_sk=[$9], ca_state=[$10], cd_demo_sk=[$3], cd_gender=[$4], cd_marital_status=[$5], cd_dep_count=[$6], cd_dep_employed_count=[$7], cd_dep_college_count=[$8])
HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query69.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query69.q.out
index 5b739ce..8cb93b4 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query69.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query69.q.out
@@ -115,7 +115,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$4], sort4=[$6], dir0=[
HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$1], c_current_addr_sk=[$2], ca_address_sk=[$3], ca_state=[$4], cd_demo_sk=[$5], cd_gender=[$6], cd_marital_status=[$7], cd_education_status=[$8], cd_purchase_estimate=[$9], cd_credit_rating=[$10], literalTrue=[$11], ws_bill_customer_sk0=[$12])
HiveFilter(condition=[IS NULL($11)])
HiveJoin(condition=[=($0, $12)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[=($0, $11)], joinType=[inner])
+ HiveSemiJoin(condition=[=($0, $11)], joinType=[semi])
HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out
index 6c3404d..fe05a6e 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out
@@ -160,7 +160,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
HiveProject(d_date=[$0])
HiveAggregate(group=[{0}])
- HiveSemiJoin(condition=[=($1, $2)], joinType=[inner])
+ HiveSemiJoin(condition=[=($1, $2)], joinType=[semi])
HiveProject(d_date=[$2], d_week_seq=[$4])
HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
@@ -182,7 +182,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
HiveProject(d_date=[$0])
HiveAggregate(group=[{0}])
- HiveSemiJoin(condition=[=($1, $2)], joinType=[inner])
+ HiveSemiJoin(condition=[=($1, $2)], joinType=[semi])
HiveProject(d_date=[$2], d_week_seq=[$4])
HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
@@ -204,7 +204,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
HiveProject(d_date=[$0])
HiveAggregate(group=[{0}])
- HiveSemiJoin(condition=[=($1, $2)], joinType=[inner])
+ HiveSemiJoin(condition=[=($1, $2)], joinType=[semi])
HiveProject(d_date=[$2], d_week_seq=[$4])
HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out
index 8efda1e..ef108a6 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out
@@ -70,7 +70,7 @@ CBO PLAN:
HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)])
HiveFilter(condition=[IS NULL($13)])
HiveJoin(condition=[=($4, $14)], joinType=[left], algorithm=[none], cost=[not available])
- HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[inner])
+ HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[semi])
HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_warehouse_sk=[$5], ws_order_number=[$6], ws_ext_ship_cost=[$7], ws_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], web_site_sk=[$11], web_company_name=[$12])
HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available])
diff --git a/ql/src/test/results/clientpositive/plan_json.q.out b/ql/src/test/results/clientpositive/plan_json.q.out
index c447283..65e2220 100644
--- a/ql/src/test/results/clientpositive/plan_json.q.out
+++ b/ql/src/test/results/clientpositive/plan_json.q.out
@@ -6,4 +6,4 @@ POSTHOOK: query: EXPLAIN FORMATTED SELECT count(1) FROM src
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
-{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"src\"\n ],\n \"table:alias\": \"src\",\n \"inputs\": [],\n \"rowCount\": 500.0,\n \"avgRowSize\": 0.0,\n \"rowType\": [\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \" [...]
+{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"src\"\n ],\n \"table:alias\": \"src\",\n \"inputs\": [],\n \"rowCount\": 500.0,\n \"avgRowSize\": 0.0,\n \"rowType\": [\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \" [...]
diff --git a/ql/src/test/results/clientpositive/pointlookup.q.out b/ql/src/test/results/clientpositive/pointlookup.q.out
index a3c4a74..a128626 100644
--- a/ql/src/test/results/clientpositive/pointlookup.q.out
+++ b/ql/src/test/results/clientpositive/pointlookup.q.out
@@ -352,7 +352,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@inoutputopt
#### A masked pattern was here ####
11
-Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
PREHOOK: query: select orOutput.key, inOutput.key
from orOutput full outer join inOutput on (orOutput.key = inOutput.key)
where orOutput.key = null
@@ -369,7 +369,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@inoutput
POSTHOOK: Input: default@oroutput
#### A masked pattern was here ####
-Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
PREHOOK: query: select orOutput.key, inOutputOpt.key
from orOutput full outer join inOutputOpt on (orOutput.key = inOutputOpt.key)
where orOutput.key = null
diff --git a/ql/src/test/results/clientpositive/ppd_join5.q.out b/ql/src/test/results/clientpositive/ppd_join5.q.out
index 37fd83f..65a37de 100644
--- a/ql/src/test/results/clientpositive/ppd_join5.q.out
+++ b/ql/src/test/results/clientpositive/ppd_join5.q.out
@@ -158,7 +158,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product
PREHOOK: query: explain
select * from (
select a.*,b.d d1,c.d d2 from
@@ -190,94 +190,99 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: a
+ filterExpr: (id1 is not null and id2 is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: false (type: boolean)
+ predicate: (id1 is not null and id2 is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: id1 (type: string), id2 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- sort order:
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: string), _col1 (type: string)
TableScan
- alias: c
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: b
+ filterExpr: ((d <= 1) and id is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: ((d <= 1) and id is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: d (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ expressions: id (type: string), d (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int)
+ key expressions: _col0 (type: string), _col0 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int)
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0
- 1
- outputColumnNames: _col0, _col1, _col2
+ 0 _col0 (type: string), _col1 (type: string)
+ 1 _col0 (type: string), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
Statistics: Num rows: 1 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 1 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
Map Operator Tree:
TableScan
Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ sort order:
Statistics: Num rows: 1 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col2 (type: int)
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
TableScan
- alias: b
- Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: c
+ filterExpr: (d <= 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: (d <= 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: id (type: string), d (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE
+ expressions: d (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: string), _col0 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col0 (type: string)
- Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: int)
+ sort order:
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string), _col1 (type: string)
- 1 _col0 (type: string), _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col4
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -285,7 +290,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product
PREHOOK: query: select * from (
select a.*,b.d d1,c.d d2 from
t1_n79 a join t2_n48 b on (a.id1 = b.id)
diff --git a/ql/src/test/results/clientpositive/ppd_udf_col.q.out b/ql/src/test/results/clientpositive/ppd_udf_col.q.out
index 7346ecf..f959d9d 100644
--- a/ql/src/test/results/clientpositive/ppd_udf_col.q.out
+++ b/ql/src/test/results/clientpositive/ppd_udf_col.q.out
@@ -68,35 +68,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), rand() (type: double)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
@@ -124,21 +101,29 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: src
+ filterExpr: (UDFToDouble(key) = 100.0D) (type: boolean)
Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: (UDFToDouble(key) = 100.0D) (type: boolean)
+ Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: key (type: string), rand() (type: double), '4' (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), rand() (type: double), '4' (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
Stage: Stage-0
@@ -265,35 +250,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string), rand() (type: double)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
@@ -321,21 +283,29 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: src
+ filterExpr: (UDFToDouble(key) = 100.0D) (type: boolean)
Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: (UDFToDouble(key) = 100.0D) (type: boolean)
+ Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: key (type: string), rand() (type: double), '4' (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), rand() (type: double), '4' (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
Stage: Stage-0
diff --git a/ql/src/test/results/clientpositive/remove_exprs_stats.q.out b/ql/src/test/results/clientpositive/remove_exprs_stats.q.out
index 3f7db2a..48b8841 100644
--- a/ql/src/test/results/clientpositive/remove_exprs_stats.q.out
+++ b/ql/src/test/results/clientpositive/remove_exprs_stats.q.out
@@ -96,35 +96,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_orc_n0
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: loc_orc_n0
- Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
@@ -162,35 +139,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_orc_n0
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: loc_orc_n0
- Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
@@ -245,35 +199,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_orc_n0
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: loc_orc_n0
- Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
@@ -353,35 +284,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_orc_n0
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: loc_orc_n0
- Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
@@ -629,35 +537,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_orc_n0
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: loc_orc_n0
- Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
@@ -747,35 +632,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@t_n7
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: t_n7
- Statistics: Num rows: 2 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: s (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
@@ -788,35 +650,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@loc_orc_n0
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: loc_orc_n0
- Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
diff --git a/ql/src/test/results/clientpositive/spark/cbo_simple_select.q.out b/ql/src/test/results/clientpositive/spark/cbo_simple_select.q.out
index 8f0ba6c..3a5953f 100644
--- a/ql/src/test/results/clientpositive/spark/cbo_simple_select.q.out
+++ b/ql/src/test/results/clientpositive/spark/cbo_simple_select.q.out
@@ -845,16 +845,9 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
- TableScan
- alias: cbo_t2
- Filter Operator
- predicate: false (type: boolean)
- Select Operator
- expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- ListSink
+ ListSink
PREHOOK: query: -- rewrite to NULL
EXPLAIN select * from cbo_t2 where (cbo_t2.c_int) IN (cbo_t2.c_int, 2*cbo_t2.c_int)
diff --git a/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out
index 3e5cc1f..6b118a0 100644
--- a/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out
@@ -466,82 +466,12 @@ POSTHOOK: Input: default@table1_n10
POSTHOOK: Input: default@table3_n0
POSTHOOK: Output: hdfs://### HDFS PATH ###
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: table1_n10
- Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: id (type: int), val (type: string), val1 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: 100 (type: int), true (type: boolean)
- sort order: ++
- Map-reduce partition columns: 100 (type: int), true (type: boolean)
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
- Execution mode: vectorized
- Map 3
- Map Operator Tree:
- TableScan
- alias: table3_n0
- Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 100 (type: int), true (type: boolean)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- keys: _col0 (type: int), _col1 (type: boolean)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: boolean)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean)
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: vectorized
- Reducer 2
- Reduce Operator Tree:
- Join Operator
- condition map:
- Left Semi Join 0 to 1
- keys:
- 0 100 (type: int), true (type: boolean)
- 1 _col0 (type: int), _col1 (type: boolean)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
@@ -668,82 +598,12 @@ POSTHOOK: Input: default@table1_n10
POSTHOOK: Input: default@table3_n0
POSTHOOK: Output: hdfs://### HDFS PATH ###
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Spark
- Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: table1_n10
- Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: id (type: int), val (type: string), val1 (type: string)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: 100 (type: int), true (type: boolean)
- sort order: ++
- Map-reduce partition columns: 100 (type: int), true (type: boolean)
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
- Execution mode: vectorized
- Map 3
- Map Operator Tree:
- TableScan
- alias: table3_n0
- Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 100 (type: int), true (type: boolean)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- keys: _col0 (type: int), _col1 (type: boolean)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: boolean)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: boolean)
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: vectorized
- Reducer 2
- Reduce Operator Tree:
- Join Operator
- condition map:
- Left Semi Join 0 to 1
- keys:
- 0 100 (type: int), true (type: boolean)
- 1 _col0 (type: int), _col1 (type: boolean)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
- limit: -1
+ limit: 0
Processor Tree:
ListSink
diff --git a/ql/src/test/results/clientpositive/spark/ppd_join5.q.out b/ql/src/test/results/clientpositive/spark/ppd_join5.q.out
index 4a1cf43..995f49e 100644
--- a/ql/src/test/results/clientpositive/spark/ppd_join5.q.out
+++ b/ql/src/test/results/clientpositive/spark/ppd_join5.q.out
@@ -161,7 +161,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 3' is a cross product
PREHOOK: query: explain
select * from (
select a.*,b.d d1,c.d d2 from
@@ -190,62 +190,66 @@ STAGE PLANS:
Stage: Stage-1
Spark
Edges:
- Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1)
- Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2)
+ Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2)
+ Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: a
+ filterExpr: (id1 is not null and id2 is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: false (type: boolean)
+ predicate: (id1 is not null and id2 is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id1 (type: string), id2 (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- sort order:
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: string)
Execution mode: vectorized
Map 4
Map Operator Tree:
TableScan
- alias: c
+ alias: b
+ filterExpr: ((d <= 1) and id is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: false (type: boolean)
+ predicate: ((d <= 1) and id is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: d (type: int)
- outputColumnNames: _col0
+ expressions: id (type: string), d (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- sort order:
+ key expressions: _col0 (type: string), _col0 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col0 (type: string)
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int)
+ value expressions: _col1 (type: int)
Execution mode: vectorized
Map 5
Map Operator Tree:
TableScan
- alias: b
+ alias: c
+ filterExpr: (d <= 1) (type: boolean)
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: false (type: boolean)
+ predicate: (d <= 1) (type: boolean)
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: id (type: string), d (type: int)
- outputColumnNames: _col0, _col1
+ expressions: d (type: int)
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: string), _col0 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col0 (type: string)
+ sort order:
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: int)
+ value expressions: _col0 (type: int)
Execution mode: vectorized
Reducer 2
Reduce Operator Tree:
@@ -253,37 +257,38 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0
- 1
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: int)
+ 0 _col0 (type: string), _col1 (type: string)
+ 1 _col0 (type: string), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
Reducer 3
Reduce Operator Tree:
Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string), _col1 (type: string)
- 1 _col0 (type: string), _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col4
- Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -291,7 +296,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 3' is a cross product
PREHOOK: query: select * from (
select a.*,b.d d1,c.d d2 from
t1_n79 a join t2_n48 b on (a.id1 = b.id)
diff --git a/ql/src/test/results/clientpositive/spark/semijoin.q.out b/ql/src/test/results/clientpositive/spark/semijoin.q.out
index c42332d..b6d6559 100644
--- a/ql/src/test/results/clientpositive/spark/semijoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/semijoin.q.out
@@ -2955,7 +2955,7 @@ POSTHOOK: Input: default@part
#### A masked pattern was here ####
CBO PLAN:
HiveProject(p_partkey=[$0])
- HiveSemiJoin(condition=[=($1, $2)], joinType=[inner])
+ HiveSemiJoin(condition=[=($1, $2)], joinType=[semi])
HiveProject(p_partkey=[$0], p_name=[$1])
HiveFilter(condition=[IS NOT NULL($1)])
HiveTableScan(table=[[default, part]], table:alias=[pp])
@@ -3017,7 +3017,7 @@ POSTHOOK: Input: default@part
#### A masked pattern was here ####
CBO PLAN:
HiveAggregate(group=[{}], agg#0=[count()])
- HiveSemiJoin(condition=[=($0, $1)], joinType=[inner])
+ HiveSemiJoin(condition=[=($0, $1)], joinType=[semi])
HiveProject(p_partkey=[$0])
HiveFilter(condition=[IS NOT NULL($0)])
HiveTableScan(table=[[default, part]], table:alias=[pp])
diff --git a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
index c4c8448..308d84c 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out
@@ -1010,8 +1010,8 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(0)
diff --git a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out
index ad95d2f..de87168 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out
@@ -1725,7 +1725,7 @@ HiveProject(p_partkey=[$0], p_name=[$1], p_mfgr=[$2], p_brand=[$3], p_type=[$4],
HiveTableScan(table=[[default, part_null]], table:alias=[part_null])
HiveProject(p_type=[$0], c=[$1], ck=[$2])
HiveAggregate(group=[{1}], c=[COUNT()], ck=[COUNT($2)])
- HiveSemiJoin(condition=[AND(=($1, $4), =($0, $3))], joinType=[inner])
+ HiveSemiJoin(condition=[AND(=($1, $4), =($0, $3))], joinType=[semi])
HiveProject(p_brand=[$3], p_type=[$4], p_container=[$6])
HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($3))])
HiveTableScan(table=[[default, part]], table:alias=[part])
@@ -1734,7 +1734,7 @@ HiveProject(p_partkey=[$0], p_name=[$1], p_mfgr=[$2], p_brand=[$3], p_type=[$4],
HiveTableScan(table=[[default, part]], table:alias=[pp])
HiveProject(p_container=[$1], literalTrue=[true], p_type=[$0])
HiveAggregate(group=[{1, 2}])
- HiveSemiJoin(condition=[AND(=($1, $4), =($0, $3))], joinType=[inner])
+ HiveSemiJoin(condition=[AND(=($1, $4), =($0, $3))], joinType=[semi])
HiveProject(p_brand=[$3], p_type=[$4], p_container=[$6])
HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($3), IS NOT NULL($6))])
HiveTableScan(table=[[default, part]], table:alias=[part])
diff --git a/ql/src/test/results/clientpositive/spark/subquery_null_agg.q.out b/ql/src/test/results/clientpositive/spark/subquery_null_agg.q.out
index 0c3a81d..39dbbf8 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_null_agg.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_null_agg.q.out
@@ -6,8 +6,8 @@ POSTHOOK: query: CREATE TABLE table_7 (int_col INT)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@table_7
-Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
-Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product
+Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product
PREHOOK: query: explain
SELECT
(t1.int_col) * (t1.int_col) AS int_col
@@ -81,16 +81,16 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Select Operator
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Select Operator
expressions: true (type: boolean)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
value expressions: _col0 (type: boolean)
Execution mode: vectorized
Map 5
@@ -100,9 +100,9 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Select Operator
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Group By Operator
aggregations: count()
minReductionHashAggr: 0.99
@@ -123,10 +123,10 @@ STAGE PLANS:
0
1
outputColumnNames: _col1
- Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 1 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: _col1 (type: boolean)
Reducer 3
Reduce Operator Tree:
@@ -137,21 +137,21 @@ STAGE PLANS:
0
1
outputColumnNames: _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 10 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Select Operator
expressions: _col2 (type: bigint), _col3 (type: bigint), _col1 (type: boolean)
outputColumnNames: _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 10 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Filter Operator
predicate: ((_col1 = 0L) or (_col3 is null and (_col2 >= _col1))) (type: boolean)
- Statistics: Num rows: 1 Data size: 10 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Select Operator
expressions: null (type: void)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 10 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 10 Basic stats: PARTIAL Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out
index ff801a2..850b4f1 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out
@@ -349,7 +349,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: explain select * from part where p_name = (select p_name from part_null_n0 where p_name is null)
PREHOOK: type: QUERY
PREHOOK: Input: default@part
@@ -377,52 +377,59 @@ STAGE PLANS:
TableScan
alias: part
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
Execution mode: vectorized
Map 3
Map Operator Tree:
TableScan
alias: part_null_n0
- Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ filterExpr: p_name is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: p_name is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: p_partkey (type: int)
+ outputColumnNames: p_partkey
+ Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(0)
+ aggregations: count(p_partkey)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
Execution mode: vectorized
Map 5
Map Operator Tree:
TableScan
alias: part_null_n0
- Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ filterExpr: p_name is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: p_name is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Execution mode: vectorized
Reducer 2
Reduce Operator Tree:
@@ -435,10 +442,10 @@ STAGE PLANS:
1
2
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 134 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 134 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -450,12 +457,12 @@ STAGE PLANS:
aggregations: count(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Stage: Stage-0
Fetch Operator
@@ -463,7 +470,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product
+Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product
PREHOOK: query: select * from part where p_name = (select p_name from part_null_n0 where p_name is null)
PREHOOK: type: QUERY
PREHOOK: Input: default@part
diff --git a/ql/src/test/results/clientpositive/subquery_exists.q.out b/ql/src/test/results/clientpositive/subquery_exists.q.out
index 9a65531..4665ec2 100644
--- a/ql/src/test/results/clientpositive/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/subquery_exists.q.out
@@ -971,8 +971,8 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: false (type: boolean)
+ Limit
+ Number of rows: 0
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count(0)
diff --git a/ql/src/test/results/clientpositive/vector_outer_join3.q.out b/ql/src/test/results/clientpositive/vector_outer_join3.q.out
index 0640aee..43d9ddf 100644
--- a/ql/src/test/results/clientpositive/vector_outer_join3.q.out
+++ b/ql/src/test/results/clientpositive/vector_outer_join3.q.out
@@ -248,7 +248,7 @@ left outer join small_alltypesorc_a_n1 hd
POSTHOOK: type: QUERY
POSTHOOK: Input: default@small_alltypesorc_a_n1
#### A masked pattern was here ####
-{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 10.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n } [...]
+{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 10.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n } [...]
PREHOOK: query: select count(*) from (select c.cstring1
from small_alltypesorc_a_n1 c
left outer join small_alltypesorc_a_n1 cd
@@ -292,7 +292,7 @@ left outer join small_alltypesorc_a_n1 hd
POSTHOOK: type: QUERY
POSTHOOK: Input: default@small_alltypesorc_a_n1
#### A masked pattern was here ####
-{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 14.75,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n [...]
+{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 14.75,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n [...]
PREHOOK: query: select count(*) from (select c.cstring1
from small_alltypesorc_a_n1 c
left outer join small_alltypesorc_a_n1 cd
@@ -336,7 +336,7 @@ left outer join small_alltypesorc_a_n1 hd
POSTHOOK: type: QUERY
POSTHOOK: Input: default@small_alltypesorc_a_n1
#### A masked pattern was here ####
-{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 26.75,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n [...]
+{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 26.75,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n [...]
PREHOOK: query: select count(*) from (select c.cstring1
from small_alltypesorc_a_n1 c
left outer join small_alltypesorc_a_n1 cd
diff --git a/ql/src/test/results/clientpositive/vector_outer_join4.q.out b/ql/src/test/results/clientpositive/vector_outer_join4.q.out
index d4bf60a..6686575 100644
--- a/ql/src/test/results/clientpositive/vector_outer_join4.q.out
+++ b/ql/src/test/results/clientpositive/vector_outer_join4.q.out
@@ -262,7 +262,7 @@ left outer join small_alltypesorc_b cd
POSTHOOK: type: QUERY
POSTHOOK: Input: default@small_alltypesorc_b
#### A masked pattern was here ####
-{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 139.86666666666667,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\" [...]
+{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 139.86666666666667,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\" [...]
PREHOOK: query: select *
from small_alltypesorc_b c
left outer join small_alltypesorc_b cd
@@ -347,7 +347,7 @@ left outer join small_alltypesorc_b hd
POSTHOOK: type: QUERY
POSTHOOK: Input: default@small_alltypesorc_b
#### A masked pattern was here ####
-{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 4.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n [...]
+{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 4.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n [...]
PREHOOK: query: select c.ctinyint
from small_alltypesorc_b c
left outer join small_alltypesorc_b hd
@@ -794,7 +794,7 @@ left outer join small_alltypesorc_b hd
POSTHOOK: type: QUERY
POSTHOOK: Input: default@small_alltypesorc_b
#### A masked pattern was here ####
-{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 8.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n [...]
+{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 8.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n [...]
PREHOOK: query: select count(*) from (select c.ctinyint
from small_alltypesorc_b c
left outer join small_alltypesorc_b cd
diff --git a/ql/src/test/results/clientpositive/vector_outer_join6.q.out b/ql/src/test/results/clientpositive/vector_outer_join6.q.out
index 174773d..0725e77 100644
--- a/ql/src/test/results/clientpositive/vector_outer_join6.q.out
+++ b/ql/src/test/results/clientpositive/vector_outer_join6.q.out
@@ -138,7 +138,7 @@ POSTHOOK: Input: default@tjoin1_n0
POSTHOOK: Input: default@tjoin2_n0
POSTHOOK: Input: default@tjoin3
#### A masked pattern was here ####
-{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"tjoin1_n0\"\n ],\n \"table:alias\": \"tjoin1_n0\",\n \"inputs\": [],\n \"rowCount\": 3.0,\n \"avgRowSize\": 8.0,\n \"rowType\": [\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"rnum\"\n },\n [...]
+{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"tjoin1_n0\"\n ],\n \"table:alias\": \"tjoin1_n0\",\n \"inputs\": [],\n \"rowCount\": 3.0,\n \"avgRowSize\": 8.0,\n \"rowType\": [\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"rnum\"\n },\n [...]
PREHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from
(select tjoin1_n0.rnum tj1rnum, tjoin2_n0.rnum tj2rnum, tjoin2_n0.c1 tj2c1 from tjoin1_n0 left outer join tjoin2_n0 on tjoin1_n0.c1 = tjoin2_n0.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1
PREHOOK: type: QUERY
@@ -173,7 +173,7 @@ POSTHOOK: Input: default@tjoin1_n0
POSTHOOK: Input: default@tjoin2_n0
POSTHOOK: Input: default@tjoin3
#### A masked pattern was here ####
-{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"tjoin1_n0\"\n ],\n \"table:alias\": \"tjoin1_n0\",\n \"inputs\": [],\n \"rowCount\": 3.0,\n \"avgRowSize\": 8.0,\n \"rowType\": [\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"rnum\"\n },\n [...]
+{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"tjoin1_n0\"\n ],\n \"table:alias\": \"tjoin1_n0\",\n \"inputs\": [],\n \"rowCount\": 3.0,\n \"avgRowSize\": 8.0,\n \"rowType\": [\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"rnum\"\n },\n [...]
PREHOOK: query: select tj1rnum, tj2rnum as rnumt3 from
(select tjoin1_n0.rnum tj1rnum, tjoin2_n0.rnum tj2rnum, tjoin2_n0.c1 tj2c1 from tjoin1_n0 left outer join tjoin2_n0 on tjoin1_n0.c1 = tjoin2_n0.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1
PREHOOK: type: QUERY