You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2016/07/09 10:28:41 UTC
hive git commit: HIVE-14176: CBO nesting windowing function within
each other when merging Project operators (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 7a91bbfc2 -> 050616112
HIVE-14176: CBO nesting windowing function within each other when merging Project operators (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/05061611
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/05061611
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/05061611
Branch: refs/heads/master
Commit: 0506161124c16b8c950e9914fac8f1b5a77d309c
Parents: 7a91bbf
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Jul 7 00:23:33 2016 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Sat Jul 9 11:27:18 2016 +0100
----------------------------------------------------------------------
.../calcite/rules/HiveProjectMergeRule.java | 46 ++++++++++++++++++--
.../hadoop/hive/ql/parse/CalcitePlanner.java | 13 ++----
.../clientpositive/windowing_duplicate.q | 13 ++++++
.../clientpositive/windowing_duplicate.q.out | 39 +++++++++++++++++
4 files changed, 98 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/05061611/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java
index fc48a26..e963546 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectMergeRule.java
@@ -17,13 +17,53 @@
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+import java.util.Set;
+
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.rules.ProjectMergeRule;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexOver;
+import org.apache.calcite.tools.RelBuilderFactory;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+/**
+ * ProjectMergeRule merges a {@link org.apache.calcite.rel.core.Project} into
+ * another {@link org.apache.calcite.rel.core.Project},
+ * provided the projects aren't projecting identical sets of input references.
+ */
public class HiveProjectMergeRule extends ProjectMergeRule {
- public static final HiveProjectMergeRule INSTANCE = new HiveProjectMergeRule();
- public HiveProjectMergeRule() {
- super(true, HiveRelFactories.HIVE_BUILDER);
+ public static final HiveProjectMergeRule INSTANCE =
+ new HiveProjectMergeRule(true, HiveRelFactories.HIVE_BUILDER);
+
+ public static final HiveProjectMergeRule INSTANCE_NO_FORCE =
+ new HiveProjectMergeRule(false, HiveRelFactories.HIVE_BUILDER);
+
+
+ private HiveProjectMergeRule(boolean force, RelBuilderFactory relBuilderFactory) {
+ super(force, relBuilderFactory);
+ }
+
+ @Override
+ public boolean matches(RelOptRuleCall call) {
+ // Currently we do not support merging windowing functions with other
+ // windowing functions i.e. embedding windowing functions within each
+ // other
+ final Project topProject = call.rel(0);
+ final Project bottomProject = call.rel(1);
+ for (RexNode expr : topProject.getChildExps()) {
+ if (expr instanceof RexOver) {
+ Set<Integer> positions = HiveCalciteUtil.getInputRefs(expr);
+ for (int pos : positions) {
+ if (bottomProject.getChildExps().get(pos) instanceof RexOver) {
+ return false;
+ }
+ }
+ }
+ }
+ return super.matches(call);
}
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/05061611/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 6c57d3e..1e44ccf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1016,10 +1016,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
// 4. Run other optimizations that do not need stats
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null,
- HepMatchOrder.BOTTOM_UP,
- ProjectRemoveRule.INSTANCE, UnionMergeRule.INSTANCE,
- new ProjectMergeRule(false, HiveRelFactories.HIVE_PROJECT_FACTORY),
- HiveAggregateProjectMergeRule.INSTANCE, HiveJoinCommuteRule.INSTANCE);
+ HepMatchOrder.BOTTOM_UP, ProjectRemoveRule.INSTANCE, UnionMergeRule.INSTANCE,
+ HiveProjectMergeRule.INSTANCE_NO_FORCE, HiveAggregateProjectMergeRule.INSTANCE,
+ HiveJoinCommuteRule.INSTANCE);
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Optimizations without stats");
// 5. Run aggregate-join transpose (cost based)
@@ -1119,8 +1118,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
// TODO: Decorelation of subquery should be done before attempting
// Partition Pruning; otherwise Expression evaluation may try to execute
// corelated sub query.
-
- LOG.info("Jesus - Plan0: " + RelOptUtil.toString(basePlan));
PerfLogger perfLogger = SessionState.getPerfLogger();
@@ -1151,8 +1148,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
"Calcite: Prejoin ordering transformation, factor out common filter elements and separating deterministic vs non-deterministic UDF");
- LOG.info("Jesus - Plan2: " + RelOptUtil.toString(basePlan));
-
// 3. Run exhaustive PPD, add not null filters, transitive inference,
// constant propagation, constant folding
List<RelOptRule> rules = Lists.newArrayList();
@@ -1191,8 +1186,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
"Calcite: Prejoin ordering transformation, PPD, not null predicates, transitive inference, constant folding");
- LOG.info("Jesus - Plan3: " + RelOptUtil.toString(basePlan));
-
// 4. Push down limit through outer join
// NOTE: We run this after PPD to support old style join syntax.
// Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and R1.y<10) or
http://git-wip-us.apache.org/repos/asf/hive/blob/05061611/ql/src/test/queries/clientpositive/windowing_duplicate.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/windowing_duplicate.q b/ql/src/test/queries/clientpositive/windowing_duplicate.q
new file mode 100644
index 0000000..ebdecd7
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/windowing_duplicate.q
@@ -0,0 +1,13 @@
+create table mytable1 (
+ mytime timestamp,
+ string1 string);
+
+create table t1 as
+select
+ sum(bound3) OVER (PARTITION BY string1 ORDER BY mytime) as bound1
+from (
+ select
+ string1, mytime,
+ lag(mytime) over (partition by string1 order by mytime) as bound3
+ from mytable1
+) sub;
http://git-wip-us.apache.org/repos/asf/hive/blob/05061611/ql/src/test/results/clientpositive/windowing_duplicate.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/windowing_duplicate.q.out b/ql/src/test/results/clientpositive/windowing_duplicate.q.out
new file mode 100644
index 0000000..c7b6d4f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/windowing_duplicate.q.out
@@ -0,0 +1,39 @@
+PREHOOK: query: create table mytable1 (
+ mytime timestamp,
+ string1 string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mytable1
+POSTHOOK: query: create table mytable1 (
+ mytime timestamp,
+ string1 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mytable1
+PREHOOK: query: create table t1 as
+select
+ sum(bound3) OVER (PARTITION BY string1 ORDER BY mytime) as bound1
+from (
+ select
+ string1, mytime,
+ lag(mytime) over (partition by string1 order by mytime) as bound3
+ from mytable1
+) sub
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@mytable1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 as
+select
+ sum(bound3) OVER (PARTITION BY string1 ORDER BY mytime) as bound1
+from (
+ select
+ string1, mytime,
+ lag(mytime) over (partition by string1 order by mytime) as bound3
+ from mytable1
+) sub
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@mytable1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.bound1 SCRIPT [(mytable1)mytable1.FieldSchema(name:mytime, type:timestamp, comment:null), (mytable1)mytable1.FieldSchema(name:string1, type:string, comment:null), ]