You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/03/04 19:22:53 UTC
[hive] branch master updated: HIVE-18920: CBO: Initialize the
Janino providers ahead of 1st query (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new de61102 HIVE-18920: CBO: Initialize the Janino providers ahead of 1st query (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
de61102 is described below
commit de61102e0c5ef6fd093a5aef97df0f8d2427d39e
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Thu Feb 28 14:06:48 2019 -0800
HIVE-18920: CBO: Initialize the Janino providers ahead of 1st query (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
---
.../java/org/apache/hadoop/hive/cli/CliDriver.java | 3 +
.../calcite/HiveDefaultRelMetadataProvider.java | 171 +++++++++++++++++----
.../hadoop/hive/ql/parse/CalcitePlanner.java | 29 +++-
.../apache/hive/service/server/HiveServer2.java | 4 +
4 files changed, 166 insertions(+), 41 deletions(-)
diff --git a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
index 8e6b01b..8eb5c01 100644
--- a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
+++ b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
@@ -74,6 +74,7 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper;
import org.apache.hadoop.hive.ql.exec.tez.TezJobExecHelper;
import org.apache.hadoop.hive.ql.metadata.HiveMaterializedViewsRegistry;
+import org.apache.hadoop.hive.ql.parse.CalcitePlanner;
import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.processors.CommandProcessor;
import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory;
@@ -773,6 +774,8 @@ public class CliDriver {
ss.updateThreadName();
+ // Initialize metadata provider class
+ CalcitePlanner.initializeMetadataProviderClass();
// Create views registry
HiveMaterializedViewsRegistry.get().init();
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
index 41c2f9e..0a27142 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
@@ -17,14 +17,43 @@
*/
package org.apache.hadoop.hive.ql.optimizer.calcite;
+import java.util.List;
+import org.apache.calcite.adapter.druid.DruidQuery;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcAggregate;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcFilter;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcJoin;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcProject;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcSort;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcUnion;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.AbstractConverter;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.AbstractRelNode;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.convert.ConverterImpl;
import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
-import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider;
+import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCostModel;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveDefaultCostModel;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveOnTezCostModel;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveRelMdCost;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExcept;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.HiveJdbcConverter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.JdbcHiveTableScan;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdColumnUniqueness;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCollation;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistinctRowCount;
@@ -41,45 +70,121 @@ import com.google.common.collect.ImmutableList;
public class HiveDefaultRelMetadataProvider {
- private final HiveConf hiveConf;
+ /**
+ * The default metadata provider can be instantiated statically since
+ * it does not need any parameter specified by user (hive conf).
+ */
+ private static final JaninoRelMetadataProvider DEFAULT =
+ JaninoRelMetadataProvider.of(
+ ChainedRelMetadataProvider.of(
+ ImmutableList.of(
+ HiveRelMdDistinctRowCount.SOURCE,
+ new HiveRelMdCost(HiveDefaultCostModel.getCostModel()).getMetadataProvider(),
+ HiveRelMdSelectivity.SOURCE,
+ HiveRelMdRowCount.SOURCE,
+ HiveRelMdUniqueKeys.SOURCE,
+ HiveRelMdColumnUniqueness.SOURCE,
+ HiveRelMdSize.SOURCE,
+ HiveRelMdMemory.SOURCE,
+ HiveRelMdDistribution.SOURCE,
+ HiveRelMdCollation.SOURCE,
+ HiveRelMdPredicates.SOURCE,
+ JaninoRelMetadataProvider.DEFAULT)));
+
+ /**
+ * This is the list of operators that are specifically used in Hive and
+ * should be loaded by the metadata providers.
+ */
+ private static final List<Class<? extends RelNode>> HIVE_REL_NODE_CLASSES =
+ ImmutableList.of(
+ RelNode.class,
+ AbstractRelNode.class,
+ RelSubset.class,
+ HepRelVertex.class,
+ ConverterImpl.class,
+ AbstractConverter.class,
+
+ HiveTableScan.class,
+ HiveAggregate.class,
+ HiveExcept.class,
+ HiveFilter.class,
+ HiveIntersect.class,
+ HiveJoin.class,
+ HiveMultiJoin.class,
+ HiveProject.class,
+ HiveRelNode.class,
+ HiveSemiJoin.class,
+ HiveSortExchange.class,
+ HiveSortLimit.class,
+ HiveTableFunctionScan.class,
+ HiveUnion.class,
+
+ DruidQuery.class,
+
+ HiveJdbcConverter.class,
+ JdbcHiveTableScan.class,
+ JdbcAggregate.class,
+ JdbcFilter.class,
+ JdbcJoin.class,
+ JdbcProject.class,
+ JdbcSort.class,
+ JdbcUnion.class);
+
+ private final RelMetadataProvider metadataProvider;
public HiveDefaultRelMetadataProvider(HiveConf hiveConf) {
- this.hiveConf = hiveConf;
+ this.metadataProvider = init(hiveConf);
}
- public RelMetadataProvider getMetadataProvider() {
-
+ private RelMetadataProvider init(HiveConf hiveConf) {
// Create cost metadata provider
- final HiveCostModel cm;
- if (HiveConf.getVar(this.hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")
- && HiveConf.getBoolVar(this.hiveConf, HiveConf.ConfVars.HIVE_CBO_EXTENDED_COST_MODEL)) {
- cm = HiveOnTezCostModel.getCostModel(hiveConf);
- } else {
- cm = HiveDefaultCostModel.getCostModel();
+ if (HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")
+ && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_CBO_EXTENDED_COST_MODEL)) {
+ // Get max split size for HiveRelMdParallelism
+ final Double maxSplitSize = (double) HiveConf.getLongVar(
+ hiveConf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE);
+
+ // Create and return metadata provider
+ JaninoRelMetadataProvider metadataProvider = JaninoRelMetadataProvider.of(
+ ChainedRelMetadataProvider.of(
+ ImmutableList.of(
+ HiveRelMdDistinctRowCount.SOURCE,
+ new HiveRelMdCost(HiveOnTezCostModel.getCostModel(hiveConf)).getMetadataProvider(),
+ HiveRelMdSelectivity.SOURCE,
+ HiveRelMdRowCount.SOURCE,
+ HiveRelMdUniqueKeys.SOURCE,
+ HiveRelMdColumnUniqueness.SOURCE,
+ HiveRelMdSize.SOURCE,
+ HiveRelMdMemory.SOURCE,
+ new HiveRelMdParallelism(maxSplitSize).getMetadataProvider(),
+ HiveRelMdDistribution.SOURCE,
+ HiveRelMdCollation.SOURCE,
+ HiveRelMdPredicates.SOURCE,
+ JaninoRelMetadataProvider.DEFAULT)));
+
+ metadataProvider.register(HIVE_REL_NODE_CLASSES);
+
+ return metadataProvider;
}
- // Get max split size for HiveRelMdParallelism
- final Double maxSplitSize = (double) HiveConf.getLongVar(
- this.hiveConf,
- HiveConf.ConfVars.MAPREDMAXSPLITSIZE);
-
- // Return MD provider
- return ChainedRelMetadataProvider.of(ImmutableList
- .of(
- HiveRelMdDistinctRowCount.SOURCE,
- new HiveRelMdCost(cm).getMetadataProvider(),
- HiveRelMdSelectivity.SOURCE,
- HiveRelMdRowCount.SOURCE,
- HiveRelMdUniqueKeys.SOURCE,
- HiveRelMdColumnUniqueness.SOURCE,
- HiveRelMdSize.SOURCE,
- HiveRelMdMemory.SOURCE,
- new HiveRelMdParallelism(maxSplitSize).getMetadataProvider(),
- HiveRelMdDistribution.SOURCE,
- HiveRelMdCollation.SOURCE,
- HiveRelMdPredicates.SOURCE,
- DefaultRelMetadataProvider.INSTANCE));
+ return DEFAULT;
}
+ public RelMetadataProvider getMetadataProvider() {
+ return metadataProvider;
+ }
+
+ /**
+ * This method can be called at startup time to pre-register all the
+ * additional Hive classes (compared to Calcite core classes) that may
+ * be visited during the planning phase.
+ */
+ public static void initializeMetadataProviderClass() {
+ // This will register the classes in the default Janino implementation
+ JaninoRelMetadataProvider.DEFAULT.register(
+ HiveDefaultRelMetadataProvider.HIVE_REL_NODE_CLASSES);
+ // This will register the classes in the default Hive implementation
+ DEFAULT.register(HiveDefaultRelMetadataProvider.HIVE_REL_NODE_CLASSES);
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index f5a1c74..de88783 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -537,7 +537,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
if (!explainConfig.isCboJoinCost()) {
// Include cost as provided by Calcite
newPlan.getCluster().invalidateMetadataQuery();
- RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(DefaultRelMetadataProvider.INSTANCE));
+ RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT);
}
if (explainConfig.isFormatted()) {
this.ctx.setCalcitePlan(HiveRelOptUtil.toJsonString(newPlan));
@@ -1768,17 +1768,21 @@ public class CalcitePlanner extends SemanticAnalyzer {
// Create and set MD provider
HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf);
- RelMetadataQuery.THREAD_PROVIDERS.set(
- JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider()));
+ RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider()));
//Remove subquery
- LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
+ }
calciteGenPlan = hepPlan(calciteGenPlan, false, mdProvider.getMetadataProvider(), null,
new HiveSubQueryRemoveRule(conf));
- LOG.debug("Plan just after removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
-
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Plan just after removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
+ }
calciteGenPlan = HiveRelDecorrelator.decorrelateQuery(calciteGenPlan);
- LOG.debug("Plan after decorrelation:\n" + RelOptUtil.toString(calciteGenPlan));
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Plan after decorrelation:\n" + RelOptUtil.toString(calciteGenPlan));
+ }
// Validate query materialization for query results caching. This check needs
// to occur before constant folding, which may remove some function calls
@@ -2243,7 +2247,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
// Use Calcite cost model for view rewriting
optCluster.invalidateMetadataQuery();
- RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(DefaultRelMetadataProvider.INSTANCE));
+ RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT);
// Add materializations to planner
for (RelOptMaterialization materialization : materializations) {
@@ -5148,6 +5152,15 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
}
+ /**
+ * This method can be called at startup time to pre-register all the
+ * additional Hive classes (compared to Calcite core classes) that may
+ * be visited during the planning phase.
+ */
+ public static void initializeMetadataProviderClass() {
+ HiveDefaultRelMetadataProvider.initializeMetadataProviderClass();
+ }
+
private enum TableType {
DRUID,
NATIVE,
diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java
index f9fb854..452b8d8 100644
--- a/service/src/java/org/apache/hive/service/server/HiveServer2.java
+++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java
@@ -76,6 +76,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.HiveMaterializedViewsRegistry;
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.ql.metadata.events.NotificationEventPoll;
+import org.apache.hadoop.hive.ql.parse.CalcitePlanner;
import org.apache.hadoop.hive.ql.plan.mapper.StatsSources;
import org.apache.hadoop.hive.ql.security.authorization.HiveMetastoreAuthorizationProvider;
import org.apache.hadoop.hive.ql.security.authorization.PolicyProviderContainer;
@@ -241,6 +242,9 @@ public class HiveServer2 extends CompositeService {
LlapRegistryService.getClient(hiveConf);
}
+ // Initialize metadata provider class
+ CalcitePlanner.initializeMetadataProviderClass();
+
try {
sessionHive = Hive.get(hiveConf);
} catch (HiveException e) {