You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/02/04 16:52:48 UTC
[hive] branch master updated: HIVE-21184: Add explain and explain
formatted CBO plan with cost information (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 02a688d HIVE-21184: Add explain and explain formatted CBO plan with cost information (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
02a688d is described below
commit 02a688d78a39bf5628351e52d95e9c0c69344927
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Fri Feb 1 14:09:03 2019 -0800
HIVE-21184: Add explain and explain formatted CBO plan with cost information (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Close apache/hive#525
---
.../test/resources/testconfiguration.properties | 1 +
.../org/apache/hadoop/hive/ql/QOutProcessor.java | 4 +
ql/src/java/org/apache/hadoop/hive/ql/Driver.java | 3 +-
.../java/org/apache/hadoop/hive/ql/QueryPlan.java | 9 +
.../apache/hadoop/hive/ql/exec/ExplainTask.java | 27 +--
.../org/apache/hadoop/hive/ql/hooks/ATSHook.java | 2 +-
.../hadoop/hive/ql/hooks/HiveProtoLoggingHook.java | 2 +-
.../hive/ql/optimizer/calcite/HiveRelOptUtil.java | 17 +-
.../ql/optimizer/calcite/HiveRelWriterImpl.java | 69 ++++++++
.../optimizer/calcite/reloperators/HiveJoin.java | 4 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 12 +-
.../hadoop/hive/ql/parse/ExplainConfiguration.java | 19 ++-
.../hive/ql/parse/ExplainSemanticAnalyzer.java | 10 +-
.../org/apache/hadoop/hive/ql/parse/HiveLexer.g | 2 +
.../org/apache/hadoop/hive/ql/parse/HiveParser.g | 2 +-
.../hadoop/hive/ql/parse/IdentifiersParser.g | 4 +-
.../ql/parse/TestUpdateDeleteSemanticAnalyzer.java | 2 +-
.../queries/clientpositive/perf/cbo_ext_query1.q | 51 ++++++
.../clientpositive/perf/tez/cbo_ext_query1.q.out | 182 +++++++++++++++++++++
.../perf/tez/constraints/cbo_ext_query1.q.out | 180 ++++++++++++++++++++
20 files changed, 568 insertions(+), 34 deletions(-)
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 8ba0ddf..a237745 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -1747,6 +1747,7 @@ tez.perf.disabled.query.files=mv_query44.q
spark.perf.disabled.query.files=query14.q,\
query64.q,\
cbo_query1.q,\
+ cbo_ext_query1.q,\
cbo_query10.q,\
cbo_query11.q,\
cbo_query12.q,\
diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java
index 254cc95..b87d904 100644
--- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java
+++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java
@@ -287,6 +287,10 @@ public class QOutProcessor {
ppm.add(new PatternReplacementPair(Pattern.compile("task_[0-9_]+"), "task_#ID#"));
ppm.add(new PatternReplacementPair(Pattern.compile("for Spark session.*?:"),
"#SPARK_SESSION_ID#:"));
+
+ ppm.add(new PatternReplacementPair(Pattern.compile("rowcount = [0-9]+(\\.[0-9]+(E[0-9]+)?)?, cumulative cost = \\{.*\\}, id = [0-9]*"),
+ "rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###"));
+
partialPlanMask = ppm.toArray(new PatternReplacementPair[ppm.size()]);
}
/* This list may be modified by specific cli drivers to mask strings that change on every test */
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
index 0dd50de..d622ce0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
@@ -692,7 +692,8 @@ public class Driver implements IDriver {
schema = getSchema(sem, conf);
plan = new QueryPlan(queryStr, sem, queryDisplay.getQueryStartTime(), queryId,
queryState.getHiveOperation(), schema);
- // save the optimized sql for the explain
+ // save the optimized plan and sql for the explain
+ plan.setOptimizedCBOPlan(ctx.getCalcitePlan());
plan.setOptimizedQueryString(ctx.getOptimizedSql());
conf.set("mapreduce.workflow.id", "hive_" + queryId);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java b/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java
index ac03efe..7636019 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java
@@ -75,6 +75,7 @@ public class QueryPlan implements Serializable {
private String cboInfo;
private String queryString;
+ private String optimizedCBOPlan;
private String optimizedQueryString;
private ArrayList<Task<? extends Serializable>> rootTasks;
@@ -761,6 +762,14 @@ public class QueryPlan implements Serializable {
this.optimizedQueryString = optimizedQueryString;
}
+ public String getOptimizedCBOPlan() {
+ return this.optimizedCBOPlan;
+ }
+
+ public void setOptimizedCBOPlan(String optimizedCBOPlan) {
+ this.optimizedCBOPlan = optimizedCBOPlan;
+ }
+
public org.apache.hadoop.hive.ql.plan.api.Query getQuery() {
return query;
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
index 7c4efab..b9d1e0b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
@@ -140,14 +140,19 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
return outJSONObject;
}
- public String outputCboPlan(String cboPlan, PrintStream out, boolean jsonOutput)
- throws JSONException {
- if (out != null) {
- out.println("CBO PLAN:");
- out.println(cboPlan);
+ public JSONObject getJSONCBOPlan(PrintStream out, ExplainWork work) throws Exception {
+ JSONObject outJSONObject = new JSONObject(new LinkedHashMap<>());
+ boolean jsonOutput = work.isFormatted();
+ String cboPlan = work.getCboPlan();
+ if (cboPlan != null) {
+ if (jsonOutput) {
+ outJSONObject.put("CBOPlan", cboPlan);
+ } else {
+ out.println("CBO PLAN:");
+ out.println(cboPlan);
+ }
}
-
- return jsonOutput ? cboPlan : null;
+ return outJSONObject;
}
public JSONObject getJSONLogicalPlan(PrintStream out, ExplainWork work) throws Exception {
@@ -236,7 +241,8 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
}
public JSONObject getJSONPlan(PrintStream out, List<Task<?>> tasks, Task<?> fetchTask,
- boolean jsonOutput, boolean isExtended, boolean appendTaskType, String cboInfo, String optimizedSQL) throws Exception {
+ boolean jsonOutput, boolean isExtended, boolean appendTaskType, String cboInfo,
+ String optimizedSQL) throws Exception {
// If the user asked for a formatted output, dump the json output
// in the output stream
@@ -392,8 +398,9 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
out = new PrintStream(outS);
if (work.isCbo()) {
- if (work.getCboPlan() != null) {
- outputCboPlan(work.getCboPlan(), out, work.isFormatted());
+ JSONObject jsonCBOPlan = getJSONCBOPlan(out, work);
+ if (work.isFormatted()) {
+ out.print(jsonCBOPlan);
}
} else if (work.isLogical()) {
JSONObject jsonLogicalPlan = getJSONLogicalPlan(out, work);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java
index 8b10823..9cb4d8c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java
@@ -269,7 +269,7 @@ public class ATSHook implements ExecuteWithHookContext {
config, //explainConfig
null, // cboInfo
plan.getOptimizedQueryString(), // optimizedSQL
- null
+ plan.getOptimizedCBOPlan()
);
@SuppressWarnings("unchecked")
ExplainTask explain = (ExplainTask) TaskFactory.get(work);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java
index 0a09675..3d51c07 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java
@@ -512,7 +512,7 @@ public class HiveProtoLoggingHook implements ExecuteWithHookContext {
config, // explainConfig
plan.getCboInfo(), // cboInfo,
plan.getOptimizedQueryString(),
- null
+ plan.getOptimizedCBOPlan()
);
ExplainTask explain = (ExplainTask) TaskFactory.get(work, conf);
explain.initialize(hookContext.getQueryState(), plan, null, null);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
index e99e6d3..2c2f91b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
@@ -47,7 +47,6 @@ import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.RelFactories;
import org.apache.calcite.rel.core.Sort;
import org.apache.calcite.rel.core.TableScan;
-import org.apache.calcite.rel.metadata.RelColumnOrigin;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeField;
@@ -66,7 +65,6 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.Pair;
-import org.apache.commons.lang3.tuple.Triple;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -1034,4 +1032,19 @@ public class HiveRelOptUtil extends RelOptUtil {
}
return null;
}
+
+ /**
+ * Converts a relational expression to a string, showing information that will aid
+ * to parse the string back.
+ */
+ public static String toJsonString(final RelNode rel) {
+ if (rel == null) {
+ return null;
+ }
+
+ final HiveRelWriterImpl planWriter = new HiveRelWriterImpl();
+ rel.explain(planWriter);
+ return planWriter.asString();
+ }
+
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelWriterImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelWriterImpl.java
new file mode 100644
index 0000000..d4e6c25
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelWriterImpl.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import java.lang.reflect.Field;
+import java.util.List;
+import java.util.Map;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.externalize.RelJsonWriter;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.util.Pair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Writer implementation for rel nodes that produces an output in json that is easily
+ * parseable back into rel nodes.
+ */
+public class HiveRelWriterImpl extends RelJsonWriter {
+
+ protected static final Logger LOG = LoggerFactory.getLogger(HiveRelWriterImpl.class);
+
+ //~ Constructors -------------------------------------------------------------
+
+ public HiveRelWriterImpl() {
+ super();
+ }
+
+ //~ Methods ------------------------------------------------------------------
+
+ protected void explain_(RelNode rel, List<Pair<String, Object>> values) {
+ super.explain_(rel, values);
+ // TODO: The following is hackish since we do not have visibility over relList
+ // and we do not want to bring all the writer utilities from Calcite. It should
+ // be changed once we move to new Calcite version and relList is visible for
+ // subclasses.
+ try {
+ final RelMetadataQuery mq = rel.getCluster().getMetadataQuery();
+ Field fs = RelJsonWriter.class.getDeclaredField("relList");
+ fs.setAccessible(true);
+ List<Object> relList = (List<Object>) fs.get(this);
+ Map<String, Object> map = (Map<String, Object>) relList.get(relList.size() - 1);
+ map.put("rowCount", mq.getRowCount(rel));
+ if (rel.getInputs().size() == 0) {
+ // This is a leaf, we will print the average row size and schema
+ map.put("avgRowSize", mq.getAverageRowSize(rel));
+ map.put("rowType", rel.getRowType().toString());
+ }
+ } catch (Exception e) {
+ LOG.warn("Failed to add additional fields in json writer", e);
+ }
+ }
+
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java
index c549d8d..16f1a5c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java
@@ -220,9 +220,9 @@ public class HiveJoin extends Join implements HiveRelNode {
public RelWriter explainTerms(RelWriter pw) {
return super.explainTerms(pw)
.item("algorithm", joinAlgorithm == null ?
- "none" : joinAlgorithm)
+ "none" : joinAlgorithm.toString())
.item("cost", joinCost == null ?
- "not available" : joinCost);
+ "not available" : joinCost.toString());
}
//required for HiveRelDecorrelator
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 646ce09..47d6e7c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -150,6 +150,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvide
import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptMaterializationValidator;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
@@ -531,10 +532,17 @@ public class CalcitePlanner extends SemanticAnalyzer {
this.ctx.setCboInfo("Plan optimized by CBO.");
this.ctx.setCboSucceeded(true);
if (this.ctx.isExplainPlan()) {
+ // Enrich explain with information derived from CBO
ExplainConfiguration explainConfig = this.ctx.getExplainConfig();
if (explainConfig.isCbo()) {
- if (explainConfig.isCboExtended()) {
- // Include join cost
+ if (!explainConfig.isCboJoinCost()) {
+ // Include cost as provided by Calcite
+ newPlan.getCluster().invalidateMetadataQuery();
+ RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(DefaultRelMetadataProvider.INSTANCE));
+ }
+ if (explainConfig.isFormatted()) {
+ this.ctx.setCalcitePlan(HiveRelOptUtil.toJsonString(newPlan));
+ } else if (explainConfig.isCboCost() || explainConfig.isCboJoinCost()) {
this.ctx.setCalcitePlan(RelOptUtil.toString(newPlan, SqlExplainLevel.ALL_ATTRIBUTES));
} else {
// Do not include join cost
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
index 28a7b43..994ef14 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
@@ -42,7 +42,8 @@ public class ExplainConfiguration {
private boolean formatted = false;
private boolean dependency = false;
private boolean cbo = false;
- private boolean cboExtended = false;
+ private boolean cboCost = false;
+ private boolean cboJoinCost = false;
private boolean logical = false;
private boolean authorize = false;
private boolean userLevelExplain = false;
@@ -94,12 +95,20 @@ public class ExplainConfiguration {
this.cbo = cbo;
}
- public boolean isCboExtended() {
- return cboExtended;
+ public boolean isCboCost() {
+ return cboCost;
}
- public void setCboExtended(boolean cboExtended) {
- this.cboExtended = cboExtended;
+ public void setCboCost(boolean cboExtended) {
+ this.cboCost = cboExtended;
+ }
+
+ public boolean isCboJoinCost() {
+ return cboJoinCost;
+ }
+
+ public void setCboJoinCost(boolean cboJoinCost) {
+ this.cboJoinCost = cboJoinCost;
}
public boolean isLogical() {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java
index 6721a37..6d7af38 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java
@@ -77,12 +77,10 @@ public class ExplainSemanticAnalyzer extends BaseSemanticAnalyzer {
config.setDependency(true);
} else if (explainOptions == HiveParser.KW_CBO) {
config.setCbo(true);
- if (i + 1 < childCount) {
- if (ast.getChild(i + 1).getType() == HiveParser.KW_EXTENDED) {
- config.setCboExtended(true);
- i++;
- }
- }
+ } else if (explainOptions == HiveParser.KW_COST) {
+ config.setCboCost(true);
+ } else if (explainOptions == HiveParser.KW_JOINCOST) {
+ config.setCboJoinCost(true);
} else if (explainOptions == HiveParser.KW_LOGICAL) {
config.setLogical(true);
} else if (explainOptions == HiveParser.KW_AUTHORIZATION) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
index 253633c..608befc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
@@ -381,6 +381,8 @@ KW_UNMANAGED: 'UNMANAGED';
KW_APPLICATION: 'APPLICATION';
KW_SYNC: 'SYNC';
KW_AST: 'AST';
+KW_COST: 'COST';
+KW_JOINCOST: 'JOINCOST';
// Operators
// NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work.
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index 1ffdec0..ce13ceb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -805,7 +805,7 @@ explainOption
: KW_EXTENDED
| KW_FORMATTED
| KW_DEPENDENCY
- | KW_CBO KW_EXTENDED?
+ | KW_CBO (KW_COST | KW_JOINCOST)?
| KW_LOGICAL
| KW_AUTHORIZATION
| KW_ANALYZE
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index 417955c..47be76c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -795,12 +795,12 @@ nonReserved
:
KW_ABORT | KW_ADD | KW_ADMIN | KW_AFTER | KW_ANALYZE | KW_ARCHIVE | KW_ASC | KW_BEFORE | KW_BUCKET | KW_BUCKETS
| KW_CASCADE | KW_CBO | KW_CHANGE | KW_CHECK | KW_CLUSTER | KW_CLUSTERED | KW_CLUSTERSTATUS | KW_COLLECTION | KW_COLUMNS
- | KW_COMMENT | KW_COMPACT | KW_COMPACTIONS | KW_COMPUTE | KW_CONCATENATE | KW_CONTINUE | KW_DATA | KW_DAY
+ | KW_COMMENT | KW_COMPACT | KW_COMPACTIONS | KW_COMPUTE | KW_CONCATENATE | KW_CONTINUE | KW_COST | KW_DATA | KW_DAY
| KW_DATABASES | KW_DATETIME | KW_DBPROPERTIES | KW_DEFERRED | KW_DEFINED | KW_DELIMITED | KW_DEPENDENCY
| KW_DESC | KW_DIRECTORIES | KW_DIRECTORY | KW_DISABLE | KW_DISTRIBUTE | KW_DOW | KW_ELEM_TYPE
| KW_ENABLE | KW_ENFORCED | KW_ESCAPED | KW_EXCLUSIVE | KW_EXPLAIN | KW_EXPORT | KW_FIELDS | KW_FILE | KW_FILEFORMAT
| KW_FIRST | KW_FORMAT | KW_FORMATTED | KW_FUNCTIONS | KW_HOLD_DDLTIME | KW_HOUR | KW_IDXPROPERTIES | KW_IGNORE
- | KW_INDEX | KW_INDEXES | KW_INPATH | KW_INPUTDRIVER | KW_INPUTFORMAT | KW_ITEMS | KW_JAR | KW_KILL
+ | KW_INDEX | KW_INDEXES | KW_INPATH | KW_INPUTDRIVER | KW_INPUTFORMAT | KW_ITEMS | KW_JAR | KW_JOINCOST | KW_KILL
| KW_KEYS | KW_KEY_TYPE | KW_LAST | KW_LIMIT | KW_OFFSET | KW_LINES | KW_LOAD | KW_LOCATION | KW_LOCK | KW_LOCKS | KW_LOGICAL | KW_LONG
| KW_MAPJOIN | KW_MATERIALIZED | KW_METADATA | KW_MINUTE | KW_MONTH | KW_MSCK | KW_NOSCAN | KW_NO_DROP | KW_NULLS | KW_OFFLINE
| KW_OPTION | KW_OUTPUTDRIVER | KW_OUTPUTFORMAT | KW_OVERWRITE | KW_OWNER | KW_PARTITIONED | KW_PARTITIONS | KW_PLUS
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java
index f449c6b..9b6827e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java
@@ -300,7 +300,7 @@ public class TestUpdateDeleteSemanticAnalyzer {
ExplainConfiguration config = new ExplainConfiguration();
config.setExtended(true);
ExplainWork work = new ExplainWork(tmp, sem.getParseContext(), sem.getRootTasks(),
- sem.getFetchTask(), null, sem, config, null, plan.getOptimizedQueryString(), null);
+ sem.getFetchTask(), null, sem, config, null, plan.getOptimizedQueryString(), plan.getOptimizedCBOPlan());
ExplainTask task = new ExplainTask();
task.setWork(work);
task.initialize(queryState, plan, null, null);
diff --git a/ql/src/test/queries/clientpositive/perf/cbo_ext_query1.q b/ql/src/test/queries/clientpositive/perf/cbo_ext_query1.q
new file mode 100644
index 0000000..e591f87
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/cbo_ext_query1.q
@@ -0,0 +1,51 @@
+set hive.mapred.mode=nonstrict;
+-- start query 1 in stream 0 using template query1.tpl and seed 2031708268
+explain cbo cost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100;
+
+explain cbo joincost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100;
+
+-- end query 1 in stream 0 using template query1.tpl
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_ext_query1.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_ext_query1.q.out
new file mode 100644
index 0000000..bcd1f8d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_ext_query1.q.out
@@ -0,0 +1,182 @@
+PREHOOK: query: explain cbo cost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@store
+PREHOOK: Input: default@store_returns
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo cost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@store
+POSTHOOK: Input: default@store_returns
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[AND(=($3, $7), >($4, $6))], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(c_customer_sk=[$0], c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[IS NOT NULL($0)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, customer]], table:alias=[customer]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[AND(=($24, _UTF-16LE'NM'), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(_o__c0=[*(/($1, $2), 1.2)], ctr_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+
+PREHOOK: query: explain cbo joincost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@store
+PREHOOK: Input: default@store_returns
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo joincost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@store
+POSTHOOK: Input: default@store_returns
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[AND(=($3, $7), >($4, $6))], joinType=[inner], algorithm=[none], cost=[{415687.382770037 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[{8.00093932086143E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(c_customer_sk=[$0], c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[IS NOT NULL($0)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, customer]], table:alias=[customer]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[{460301.9976112889 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[{5.175767820386722E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[AND(=($24, _UTF-16LE'NM'), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(_o__c0=[*(/($1, $2), 1.2)], ctr_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[{5.3635511784936875E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_ext_query1.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_ext_query1.q.out
new file mode 100644
index 0000000..255261b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_ext_query1.q.out
@@ -0,0 +1,180 @@
+PREHOOK: query: explain cbo cost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@store
+PREHOOK: Input: default@store_returns
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo cost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@store
+POSTHOOK: Input: default@store_returns
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[AND(=($3, $7), >($4, $6))], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(c_customer_sk=[$0], c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, customer]], table:alias=[customer]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[=($6, 2000)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[=($24, _UTF-16LE'NM')]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(_o__c0=[*(/($1, $2), 1.2)], ctr_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[=($6, 2000)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+
+PREHOOK: query: explain cbo joincost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@store
+PREHOOK: Input: default@store_returns
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo joincost
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@store
+POSTHOOK: Input: default@store_returns
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[AND(=($3, $7), >($4, $6))], joinType=[inner], algorithm=[none], cost=[{415687.382770037 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[{8.00093932086143E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(c_customer_sk=[$0], c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, customer]], table:alias=[customer]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[{460301.9976112889 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[{5.175767820386722E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[=($6, 2000)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[=($24, _UTF-16LE'NM')]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(_o__c0=[*(/($1, $2), 1.2)], ctr_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[{5.3635511784936875E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveFilter(condition=[=($6, 2000)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked###
+