You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2021/11/17 16:24:03 UTC
[hive] branch master updated: HIVE-25670: Avoid getTable() calls for foreign key tables not used in… (#2763) (Steve Carlin reviewed by Zoltan Haindrich)

This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 47ba530  HIVE-25670: Avoid getTable() calls for foreign key tables not used in… (#2763) (Steve Carlin reviewed by Zoltan Haindrich)
47ba530 is described below

commit 47ba530464c6941b4fc0f2882fd09ce1683808d6
Author: scarlin-cloudera <55...@users.noreply.github.com>
AuthorDate: Wed Nov 17 08:23:42 2021 -0800

    HIVE-25670: Avoid getTable() calls for foreign key tables not used in… (#2763) (Steve Carlin reviewed by Zoltan Haindrich)
    
    RelOptHiveTable currently fetches the Table information for all
    referential constraint tables. However, it only needs to fetch the tables
    that are used in the query.
---
 .../ql/metadata/HiveMaterializedViewsRegistry.java |  5 +-
 .../hive/ql/optimizer/calcite/RelOptHiveTable.java | 47 +++++++------
 .../hadoop/hive/ql/parse/CalcitePlanner.java       |  5 +-
 .../apache/hadoop/hive/ql/parse/ParseContext.java  |  6 +-
 .../hadoop/hive/ql/parse/ParsedQueryTables.java    | 29 +++++++++
 .../apache/hadoop/hive/ql/parse/QueryTables.java   | 76 ++++++++++++++++++++++
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java     | 10 +--
 7 files changed, 142 insertions(+), 36 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
index 9af4d0a..fba0a02 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java
@@ -66,6 +66,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter;
 import org.apache.hadoop.hive.ql.parse.CBOPlan;
 import org.apache.hadoop.hive.ql.parse.CalcitePlanner;
 import org.apache.hadoop.hive.ql.parse.ParseUtils;
+import org.apache.hadoop.hive.ql.parse.QueryTables;
 import org.apache.hadoop.hive.ql.parse.RowResolver;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.serde2.SerDeException;
@@ -459,7 +460,7 @@ public final class HiveMaterializedViewsRegistry {
       // for materialized views.
       RelOptHiveTable optTable = new RelOptHiveTable(null, cluster.getTypeFactory(), fullyQualifiedTabName,
           rowType, viewTable, nonPartitionColumns, partitionColumns, new ArrayList<>(),
-          conf, null, new HashMap<>(), new HashMap<>(), new HashMap<>(), new AtomicInteger());
+          conf, null, new QueryTables(true), new HashMap<>(), new HashMap<>(), new AtomicInteger());
       DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false),
           dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN,
           intervals, null, null);
@@ -474,7 +475,7 @@ public final class HiveMaterializedViewsRegistry {
       // for materialized views.
       RelOptHiveTable optTable = new RelOptHiveTable(null, cluster.getTypeFactory(), fullyQualifiedTabName,
           rowType, viewTable, nonPartitionColumns, partitionColumns, new ArrayList<>(),
-          conf, null, new HashMap<>(), new HashMap<>(), new HashMap<>(), new AtomicInteger());
+          conf, null, new QueryTables(true), new HashMap<>(), new HashMap<>(), new AtomicInteger());
       tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable,
           viewTable.getTableName(), null, false, false);
     }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
index 8f9b78c..385fe9a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
@@ -67,6 +67,7 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter;
 import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
 import org.apache.hadoop.hive.ql.parse.ColumnStatsList;
+import org.apache.hadoop.hive.ql.parse.ParsedQueryTables;
 import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
 import org.apache.hadoop.hive.ql.plan.ColStatistics;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
@@ -82,6 +83,7 @@ import org.slf4j.LoggerFactory;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 
 public class RelOptHiveTable implements RelOptTable {
 
@@ -102,11 +104,12 @@ public class RelOptHiveTable implements RelOptTable {
   private final int                               noOfNonVirtualCols;
   private final List<ImmutableBitSet>             keys;
   private final List<ImmutableBitSet>             nonNullablekeys;
-  private final List<RelReferentialConstraint>    referentialConstraints;
+  private List<RelReferentialConstraint>          referentialConstraints;
+  private boolean                                 fetchedReferentialConstraints;
   private final HiveConf                          hiveConf;
 
   private final Hive                              db;
-  private final Map<String, Table>                tablesCache;
+  private final ParsedQueryTables                 tablesCache;
   private final Map<String, PrunedPartitionList>  partitionCache;
   private final Map<String, ColumnStatsList>      colStatsCache;
   private final AtomicInteger                     noColsMissingStats;
@@ -119,7 +122,7 @@ public class RelOptHiveTable implements RelOptTable {
 
   public RelOptHiveTable(RelOptSchema calciteSchema, RelDataTypeFactory typeFactory, List<String> qualifiedTblName,
       RelDataType rowType, Table hiveTblMetadata, List<ColumnInfo> hiveNonPartitionCols, List<ColumnInfo> hivePartitionCols,
-      List<VirtualColumn> hiveVirtualCols, HiveConf hconf, Hive db, Map<String, Table> tabNameToTabObject,
+      List<VirtualColumn> hiveVirtualCols, HiveConf hconf, Hive db, ParsedQueryTables tabNameToTabObject,
       Map<String, PrunedPartitionList> partitionCache, Map<String, ColumnStatsList> colStatsCache,
       AtomicInteger noColsMissingStats) {
     this.schema = calciteSchema;
@@ -144,7 +147,6 @@ public class RelOptHiveTable implements RelOptTable {
     Pair<List<ImmutableBitSet>, List<ImmutableBitSet>> constraintKeys = generateKeys();
     this.keys = constraintKeys.left;
     this.nonNullablekeys = constraintKeys.right;
-    this.referentialConstraints = generateReferentialConstraints();
   }
 
   //~ Methods ----------------------------------------------------------------
@@ -248,6 +250,11 @@ public class RelOptHiveTable implements RelOptTable {
     return false;
   }
 
+  public boolean hasReferentialConstraints() {
+    ForeignKeyInfo foreignKeyInfo = hiveTblMetadata.getForeignKeyInfo();
+    return foreignKeyInfo != null && !foreignKeyInfo.getForeignKeys().isEmpty();
+  }
+
   @Override
   public List<ImmutableBitSet> getKeys() {
     return keys;
@@ -255,6 +262,12 @@ public class RelOptHiveTable implements RelOptTable {
 
   @Override
   public List<RelReferentialConstraint> getReferentialConstraints() {
+    // Do a lazy load here. We only want to fetch the constraint tables that
+    // are used in the query.
+    if (!fetchedReferentialConstraints) {
+      referentialConstraints = generateReferentialConstraints();
+      fetchedReferentialConstraints = true;
+    }
     return referentialConstraints;
   }
 
@@ -333,12 +346,11 @@ public class RelOptHiveTable implements RelOptTable {
           parentTableQualifiedName.add(parentTableName);
           qualifiedName = parentTableName;
         }
-        Table parentTab = getTable(qualifiedName);
+        Table parentTab = tablesCache.getParsedTable(qualifiedName);
         if (parentTab == null) {
-          LOG.error("Table for primary key not found: "
-              + "databaseName: " + parentDatabaseName + ", "
-              + "tableName: " + parentTableName);
-          return ImmutableList.of();
+          // Table doesn't exist in the cache, so we don't need to track
+          // these referential constraints.
+          continue;
         }
         ImmutableList.Builder<IntPair> keys = ImmutableList.builder();
         for (ForeignKeyCol fkCol : fkCols) {
@@ -359,7 +371,7 @@ public class RelOptHiveTable implements RelOptTable {
           if (fkPos == rowType.getFieldNames().size()
               || pkPos == parentTab.getAllCols().size()) {
             LOG.error("Column for foreign key definition " + fkCol + " not found");
-            return ImmutableList.of();
+            continue;
           }
           keys.add(IntPair.of(fkPos, pkPos));
         }
@@ -370,21 +382,6 @@ public class RelOptHiveTable implements RelOptTable {
     return builder.build();
   }
 
-  private Table getTable(String tableName) {
-    if (!tablesCache.containsKey(tableName)) {
-      try {
-        Table table = db.getTable(tableName);
-        if (table != null) {
-          tablesCache.put(tableName, table);
-        }
-        return table;
-      } catch (HiveException e) {
-        throw new RuntimeException(e);
-      }
-    }
-    return tablesCache.get(tableName);
-  }
-
   @Override
   public RelNode toRel(ToRelContext context) {
     return new LogicalTableScan(context.getCluster(), this);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 2d50eb6..f88c97a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1648,6 +1648,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
       perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
       try {
         calcitePlan = genLogicalPlan(getQB(), true, null, null);
+        // freeze the names in the hash map for objects that are only interested
+        // in the parsed tables in the original query.
+        tabNameToTabObject.markParsingCompleted();
         // if it is to create view, we do not use table alias
         resultSchema = convertRowSchemaToResultSetSchema(relToHiveRR.get(calcitePlan),
             (forViewCreation || getQB().isMaterializedView()) ? false : HiveConf.getBoolVar(conf,
@@ -3075,7 +3078,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
                   || qb.getAliasInsideView().contains(tableAlias.toLowerCase()), tableScanTrait);
         }
 
-        if (!optTable.getReferentialConstraints().isEmpty()) {
+        if (optTable.hasReferentialConstraints()) {
           profilesCBO.add(ExtendedCBOProfile.REFERENTIAL_CONSTRAINTS);
         }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
index 6c36375..d82369f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
@@ -97,7 +97,7 @@ public class ParseContext {
   // reducer
   private Map<String, PrunedPartitionList> prunedPartitions;
   private Map<String, ReadEntity> viewAliasToInput;
-  private Map<String, Table> tabNameToTabObject;
+  private QueryTables tabNameToTabObject;
 
   /**
    * The lineage information.
@@ -192,7 +192,7 @@ public class ParseContext {
       Context ctx, Map<String, String> idToTableNameMap, int destTableId,
       UnionProcContext uCtx, List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOpsNoReducer,
       Map<String, PrunedPartitionList> prunedPartitions,
-      Map<String, Table> tabNameToTabObject,
+      QueryTables tabNameToTabObject,
       Map<TableScanOperator, SampleDesc> opToSamplePruner,
       GlobalLimitCtx globalLimitCtx,
       Map<String, SplitSample> nameToSplitSample,
@@ -636,7 +636,7 @@ public class ParseContext {
     this.needViewColumnAuthorization = needViewColumnAuthorization;
   }
 
-  public Map<String, Table> getTabNameToTabObject() {
+  public QueryTables getTabNameToTabObject() {
     return tabNameToTabObject;
   }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParsedQueryTables.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParsedQueryTables.java
new file mode 100644
index 0000000..fd973ad
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParsedQueryTables.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.parse;
+
+import org.apache.hadoop.hive.ql.metadata.Table;
+
+/**
+ * Interface to fetch hash map containing query tables.
+ **/
+
+public interface ParsedQueryTables {
+  public Table getParsedTable(String name);
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QueryTables.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QueryTables.java
new file mode 100644
index 0000000..af0aaaf
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QueryTables.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.parse;
+
+/**
+ * Class to fetch hash map containing query tables.
+ * This object is used to hold all the tables used in the query. This object
+ * is meant to be mutable until the parsing is complete. Once that happens,
+ * 'setImmutable' should be called so that any caller using this object
+ * will be given an immutable set.  If a caller tries to fetch the table map
+ * while this object is being built, an exception will be thrown.
+ **/
+
+import org.apache.hadoop.hive.ql.metadata.Table;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableMap;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class QueryTables implements ParsedQueryTables {
+  private final Map<String, Table> tableObjects = new HashMap<>();
+
+  private Map<String, Table> parsedTableObjects;
+
+  public QueryTables(boolean isEmptyMap) {
+    if (isEmptyMap) {
+      markParsingCompleted();
+    }
+  }
+
+  public QueryTables() {
+    this(false);
+  }
+
+  public boolean containsKey(String name) {
+    return tableObjects.containsKey(name);
+  }
+
+  public void put(String name, Table table) {
+    tableObjects.put(name, table);
+  }
+
+  public void markParsingCompleted() {
+    parsedTableObjects = ImmutableMap.copyOf(tableObjects);
+  }
+
+  public Table get(String name) {
+    return tableObjects.get(name);
+  }
+
+  public Table getParsedTable(String name) {
+    if (parsedTableObjects == null) {
+      throw new RuntimeException("Cannot call getParsedTable() until parsing is marked " +
+          "completed.");
+    }
+    return parsedTableObjects.get(name);
+  }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index ee05acd..b6a53c3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -433,7 +433,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
   private WriteEntity acidAnalyzeTable;
 
   // A mapping from a tableName to a table object in metastore.
-  Map<String, Table> tabNameToTabObject;
+  QueryTables tabNameToTabObject;
 
   // The tokens we should ignore when we are trying to do table masking.
   private static final Set<Integer> IGNORED_TOKENS = Sets.newHashSet(HiveParser.TOK_GROUPBY,
@@ -500,7 +500,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     viewAliasToInput = new HashMap<String, ReadEntity>();
     mergeIsDirect = true;
     noscan = false;
-    tabNameToTabObject = new HashMap<>();
+    tabNameToTabObject = new QueryTables();
     defaultJoinMerge = !HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MERGE_NWAY_JOINS);
     disableJoinMerge = defaultJoinMerge;
     defaultNullOrder = NullOrdering.defaultNullOrder(conf);
@@ -2693,7 +2693,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       if (viewMask.isEnabled() && analyzeRewrite == null) {
         ParseResult parseResult = rewriteASTWithMaskAndFilter(viewMask, viewTree,
             ctx.getViewTokenRewriteStream(viewFullyQualifiedName),
-            ctx, db, tabNameToTabObject);
+            ctx, db);
         viewTree = parseResult.getTree();
       }
       SemanticDispatcher nodeOriginDispatcher = new SemanticDispatcher() {
@@ -12272,7 +12272,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
   // For the replacement, we leverage the methods that are used for
   // unparseTranslator.
   private ParseResult rewriteASTWithMaskAndFilter(TableMask tableMask, ASTNode ast, TokenRewriteStream tokenRewriteStream,
-                                                Context ctx, Hive db, Map<String, Table> tabNameToTabObject)
+                                                Context ctx, Hive db)
       throws SemanticException {
     // 1. collect information about CTE if there is any.
     // The base table of CTE should be masked.
@@ -12561,7 +12561,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
         (tableMask.isEnabled() && analyzeRewrite == null)) {
       // Here we rewrite the * and also the masking table
       ParseResult rewrittenResult = rewriteASTWithMaskAndFilter(tableMask, astForMasking, ctx.getTokenRewriteStream(),
-          ctx, db, tabNameToTabObject);
+          ctx, db);
       ASTNode rewrittenAST = rewrittenResult.getTree();
       if (astForMasking != rewrittenAST) {
         usesMasking = true;