You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/03/04 19:22:53 UTC

[hive] branch master updated: HIVE-18920: CBO: Initialize the Janino providers ahead of 1st query (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new de61102  HIVE-18920: CBO: Initialize the Janino providers ahead of 1st query (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
de61102 is described below

commit de61102e0c5ef6fd093a5aef97df0f8d2427d39e
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Thu Feb 28 14:06:48 2019 -0800

    HIVE-18920: CBO: Initialize the Janino providers ahead of 1st query (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
---
 .../java/org/apache/hadoop/hive/cli/CliDriver.java |   3 +
 .../calcite/HiveDefaultRelMetadataProvider.java    | 171 +++++++++++++++++----
 .../hadoop/hive/ql/parse/CalcitePlanner.java       |  29 +++-
 .../apache/hive/service/server/HiveServer2.java    |   4 +
 4 files changed, 166 insertions(+), 41 deletions(-)

diff --git a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
index 8e6b01b..8eb5c01 100644
--- a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
+++ b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
@@ -74,6 +74,7 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper;
 import org.apache.hadoop.hive.ql.exec.tez.TezJobExecHelper;
 import org.apache.hadoop.hive.ql.metadata.HiveMaterializedViewsRegistry;
+import org.apache.hadoop.hive.ql.parse.CalcitePlanner;
 import org.apache.hadoop.hive.ql.parse.HiveParser;
 import org.apache.hadoop.hive.ql.processors.CommandProcessor;
 import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory;
@@ -773,6 +774,8 @@ public class CliDriver {
 
     ss.updateThreadName();
 
+    // Initialize metadata provider class
+    CalcitePlanner.initializeMetadataProviderClass();
     // Create views registry
     HiveMaterializedViewsRegistry.get().init();
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
index 41c2f9e..0a27142 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
@@ -17,14 +17,43 @@
  */
 package org.apache.hadoop.hive.ql.optimizer.calcite;
 
+import java.util.List;
+import org.apache.calcite.adapter.druid.DruidQuery;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcAggregate;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcFilter;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcJoin;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcProject;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcSort;
+import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcUnion;
+import org.apache.calcite.plan.hep.HepRelVertex;
+import org.apache.calcite.plan.volcano.AbstractConverter;
+import org.apache.calcite.plan.volcano.RelSubset;
+import org.apache.calcite.rel.AbstractRelNode;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.convert.ConverterImpl;
 import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
-import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider;
+import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMetadataProvider;
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCostModel;
 import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveDefaultCostModel;
 import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveOnTezCostModel;
 import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveRelMdCost;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExcept;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.HiveJdbcConverter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.JdbcHiveTableScan;
 import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdColumnUniqueness;
 import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCollation;
 import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistinctRowCount;
@@ -41,45 +70,121 @@ import com.google.common.collect.ImmutableList;
 
 public class HiveDefaultRelMetadataProvider {
 
-  private final HiveConf hiveConf;
+  /**
+   * The default metadata provider can be instantiated statically since
+   * it does not need any parameter specified by user (hive conf).
+   */
+  private static final JaninoRelMetadataProvider DEFAULT =
+      JaninoRelMetadataProvider.of(
+          ChainedRelMetadataProvider.of(
+              ImmutableList.of(
+                  HiveRelMdDistinctRowCount.SOURCE,
+                  new HiveRelMdCost(HiveDefaultCostModel.getCostModel()).getMetadataProvider(),
+                  HiveRelMdSelectivity.SOURCE,
+                  HiveRelMdRowCount.SOURCE,
+                  HiveRelMdUniqueKeys.SOURCE,
+                  HiveRelMdColumnUniqueness.SOURCE,
+                  HiveRelMdSize.SOURCE,
+                  HiveRelMdMemory.SOURCE,
+                  HiveRelMdDistribution.SOURCE,
+                  HiveRelMdCollation.SOURCE,
+                  HiveRelMdPredicates.SOURCE,
+                  JaninoRelMetadataProvider.DEFAULT)));
+
+  /**
+   * This is the list of operators that are specifically used in Hive and
+   * should be loaded by the metadata providers.
+   */
+  private static final List<Class<? extends RelNode>> HIVE_REL_NODE_CLASSES =
+      ImmutableList.of(
+          RelNode.class,
+          AbstractRelNode.class,
+          RelSubset.class,
+          HepRelVertex.class,
+          ConverterImpl.class,
+          AbstractConverter.class,
+
+          HiveTableScan.class,
+          HiveAggregate.class,
+          HiveExcept.class,
+          HiveFilter.class,
+          HiveIntersect.class,
+          HiveJoin.class,
+          HiveMultiJoin.class,
+          HiveProject.class,
+          HiveRelNode.class,
+          HiveSemiJoin.class,
+          HiveSortExchange.class,
+          HiveSortLimit.class,
+          HiveTableFunctionScan.class,
+          HiveUnion.class,
+
+          DruidQuery.class,
+
+          HiveJdbcConverter.class,
+          JdbcHiveTableScan.class,
+          JdbcAggregate.class,
+          JdbcFilter.class,
+          JdbcJoin.class,
+          JdbcProject.class,
+          JdbcSort.class,
+          JdbcUnion.class);
+
+  private final RelMetadataProvider metadataProvider;
 
 
   public HiveDefaultRelMetadataProvider(HiveConf hiveConf) {
-    this.hiveConf = hiveConf;
+    this.metadataProvider = init(hiveConf);
   }
 
-  public RelMetadataProvider getMetadataProvider() {
-
+  private RelMetadataProvider init(HiveConf hiveConf) {
     // Create cost metadata provider
-    final HiveCostModel cm;
-    if (HiveConf.getVar(this.hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")
-            && HiveConf.getBoolVar(this.hiveConf, HiveConf.ConfVars.HIVE_CBO_EXTENDED_COST_MODEL)) {
-      cm = HiveOnTezCostModel.getCostModel(hiveConf);
-    } else {
-      cm = HiveDefaultCostModel.getCostModel();
+    if (HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")
+        && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_CBO_EXTENDED_COST_MODEL)) {
+      // Get max split size for HiveRelMdParallelism
+      final Double maxSplitSize = (double) HiveConf.getLongVar(
+          hiveConf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE);
+
+      // Create and return metadata provider
+      JaninoRelMetadataProvider metadataProvider = JaninoRelMetadataProvider.of(
+          ChainedRelMetadataProvider.of(
+              ImmutableList.of(
+                  HiveRelMdDistinctRowCount.SOURCE,
+                  new HiveRelMdCost(HiveOnTezCostModel.getCostModel(hiveConf)).getMetadataProvider(),
+                  HiveRelMdSelectivity.SOURCE,
+                  HiveRelMdRowCount.SOURCE,
+                  HiveRelMdUniqueKeys.SOURCE,
+                  HiveRelMdColumnUniqueness.SOURCE,
+                  HiveRelMdSize.SOURCE,
+                  HiveRelMdMemory.SOURCE,
+                  new HiveRelMdParallelism(maxSplitSize).getMetadataProvider(),
+                  HiveRelMdDistribution.SOURCE,
+                  HiveRelMdCollation.SOURCE,
+                  HiveRelMdPredicates.SOURCE,
+                  JaninoRelMetadataProvider.DEFAULT)));
+
+      metadataProvider.register(HIVE_REL_NODE_CLASSES);
+
+      return metadataProvider;
     }
 
-    // Get max split size for HiveRelMdParallelism
-    final Double maxSplitSize = (double) HiveConf.getLongVar(
-            this.hiveConf,
-            HiveConf.ConfVars.MAPREDMAXSPLITSIZE);
-
-    // Return MD provider
-    return ChainedRelMetadataProvider.of(ImmutableList
-            .of(
-                    HiveRelMdDistinctRowCount.SOURCE,
-                    new HiveRelMdCost(cm).getMetadataProvider(),
-                    HiveRelMdSelectivity.SOURCE,
-                    HiveRelMdRowCount.SOURCE,
-                    HiveRelMdUniqueKeys.SOURCE,
-                    HiveRelMdColumnUniqueness.SOURCE,
-                    HiveRelMdSize.SOURCE,
-                    HiveRelMdMemory.SOURCE,
-                    new HiveRelMdParallelism(maxSplitSize).getMetadataProvider(),
-                    HiveRelMdDistribution.SOURCE,
-                    HiveRelMdCollation.SOURCE,
-                    HiveRelMdPredicates.SOURCE,
-                    DefaultRelMetadataProvider.INSTANCE));
+    return DEFAULT;
   }
 
+  public RelMetadataProvider getMetadataProvider() {
+    return metadataProvider;
+  }
+
+  /**
+   * This method can be called at startup time to pre-register all the
+   * additional Hive classes (compared to Calcite core classes) that may
+   * be visited during the planning phase.
+   */
+  public static void initializeMetadataProviderClass() {
+    // This will register the classes in the default Janino implementation
+    JaninoRelMetadataProvider.DEFAULT.register(
+        HiveDefaultRelMetadataProvider.HIVE_REL_NODE_CLASSES);
+    // This will register the classes in the default Hive implementation
+    DEFAULT.register(HiveDefaultRelMetadataProvider.HIVE_REL_NODE_CLASSES);
+  }
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index f5a1c74..de88783 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -537,7 +537,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
                 if (!explainConfig.isCboJoinCost()) {
                   // Include cost as provided by Calcite
                   newPlan.getCluster().invalidateMetadataQuery();
-                  RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(DefaultRelMetadataProvider.INSTANCE));
+                  RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT);
                 }
                 if (explainConfig.isFormatted()) {
                   this.ctx.setCalcitePlan(HiveRelOptUtil.toJsonString(newPlan));
@@ -1768,17 +1768,21 @@ public class CalcitePlanner extends SemanticAnalyzer {
 
       // Create and set MD provider
       HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf);
-      RelMetadataQuery.THREAD_PROVIDERS.set(
-              JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider()));
+      RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider()));
 
       //Remove subquery
-      LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
+      }
       calciteGenPlan = hepPlan(calciteGenPlan, false, mdProvider.getMetadataProvider(), null,
               new HiveSubQueryRemoveRule(conf));
-      LOG.debug("Plan just after removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
-
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Plan just after removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
+      }
       calciteGenPlan = HiveRelDecorrelator.decorrelateQuery(calciteGenPlan);
-      LOG.debug("Plan after decorrelation:\n" + RelOptUtil.toString(calciteGenPlan));
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Plan after decorrelation:\n" + RelOptUtil.toString(calciteGenPlan));
+      }
 
       // Validate query materialization for query results caching. This check needs
       // to occur before constant folding, which may remove some function calls
@@ -2243,7 +2247,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
 
         // Use Calcite cost model for view rewriting
         optCluster.invalidateMetadataQuery();
-        RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(DefaultRelMetadataProvider.INSTANCE));
+        RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT);
 
         // Add materializations to planner
         for (RelOptMaterialization materialization : materializations) {
@@ -5148,6 +5152,15 @@ public class CalcitePlanner extends SemanticAnalyzer {
     }
   }
 
+  /**
+   * This method can be called at startup time to pre-register all the
+   * additional Hive classes (compared to Calcite core classes) that may
+   * be visited during the planning phase.
+   */
+  public static void initializeMetadataProviderClass() {
+    HiveDefaultRelMetadataProvider.initializeMetadataProviderClass();
+  }
+
   private enum TableType {
     DRUID,
     NATIVE,
diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java
index f9fb854..452b8d8 100644
--- a/service/src/java/org/apache/hive/service/server/HiveServer2.java
+++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java
@@ -76,6 +76,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.HiveMaterializedViewsRegistry;
 import org.apache.hadoop.hive.ql.metadata.HiveUtils;
 import org.apache.hadoop.hive.ql.metadata.events.NotificationEventPoll;
+import org.apache.hadoop.hive.ql.parse.CalcitePlanner;
 import org.apache.hadoop.hive.ql.plan.mapper.StatsSources;
 import org.apache.hadoop.hive.ql.security.authorization.HiveMetastoreAuthorizationProvider;
 import org.apache.hadoop.hive.ql.security.authorization.PolicyProviderContainer;
@@ -241,6 +242,9 @@ public class HiveServer2 extends CompositeService {
       LlapRegistryService.getClient(hiveConf);
     }
 
+    // Initialize metadata provider class
+    CalcitePlanner.initializeMetadataProviderClass();
+
     try {
       sessionHive = Hive.get(hiveConf);
     } catch (HiveException e) {