You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/07/26 21:40:33 UTC

[impala] 03/08: IMPALA-8606: Don't load table meta for GET_TABLES in local catalog mode

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit ca0785ec206f27f06d8d6fd1b710779e548bbd8e
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Wed Jul 17 01:41:36 2019 +0000

    IMPALA-8606: Don't load table meta for GET_TABLES in local catalog mode
    
    In LocalCatalog implementation, LocalDb#getTable will always return a
    completely loaded table containing all the meta of columns, partitions,
    files, etc. It's time consuming if we implement the GET_TABLES
    HiveServer2 operation based on this interface, since GET_TABLES only
    requires table names, table types and table comments, while this
    interface will trigger catalogd to fully load the table meta. It becomes
    worse when we do this for all the tables.
    
    This patch introduces a new interface, getTableIfCached, to return a
    LocalIncompleteTable object if the corresponding table is unloaded,
    which requires no round trips to the catalogd. It's used to boost the
    GET_TABLES performance in LocalCatalog mode.
    
    Tests
     - Testing in a HMS with 100 dbs and 3000 tables, without this patch it
    takes ~2mins in GET_TABLES for all tables on a cold started cluster. With
    this patch, the time reduces to ~1s.
     - Testing in HUE-4.4.0 with a db with 3000 tables, the performance is the
    same as using legacy catalog implementation.
    
    Change-Id: Ia8bbab7efdf8e629abe09d89ae3bd770e3feaccb
    Reviewed-on: http://gerrit.cloudera.org:8080/13874
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../java/org/apache/impala/catalog/Catalog.java    | 18 +++++++++
 fe/src/main/java/org/apache/impala/catalog/Db.java |  8 ++++
 .../java/org/apache/impala/catalog/FeCatalog.java  | 13 ++++--
 .../main/java/org/apache/impala/catalog/FeDb.java  |  6 +++
 .../apache/impala/catalog/local/LocalCatalog.java  | 16 ++++++++
 .../org/apache/impala/catalog/local/LocalDb.java   | 13 ++++--
 .../impala/catalog/local/LocalIncompleteTable.java | 47 ++++++++++++++++++++++
 .../java/org/apache/impala/service/MetadataOp.java |  8 +++-
 .../impala/catalog/local/LocalCatalogTest.java     | 31 ++++++++++++++
 .../java/org/apache/impala/service/JdbcTest.java   |  7 ++--
 tests/hs2/test_hs2.py                              |  4 +-
 11 files changed, 156 insertions(+), 15 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/Catalog.java b/fe/src/main/java/org/apache/impala/catalog/Catalog.java
index 2313d65..67441dc 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Catalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Catalog.java
@@ -163,6 +163,24 @@ public abstract class Catalog implements AutoCloseable {
   }
 
   /**
+   * Returns the table with the given name if it's completely loaded in the cache.
+   * Otherwise, return an IncompleteTable for it.
+   */
+  public Table getTableIfCachedNoThrow(String dbName, String tableName) {
+    // In legacy catalog implementation, this is the same behavior as getTableNoThrow.
+    return getTableNoThrow(dbName, tableName);
+  }
+
+  /**
+   * The same as getTableIfCachedNoThrow except that we'll throw an exception if database
+   * not exists.
+   */
+  public Table getTableIfCached(String dbName, String tableName)
+      throws DatabaseNotFoundException {
+    return getTable(dbName, tableName);
+  }
+
+  /**
    * Removes a table from the catalog and returns the table that was removed, or null
    * if the table/database does not exist.
    */
diff --git a/fe/src/main/java/org/apache/impala/catalog/Db.java b/fe/src/main/java/org/apache/impala/catalog/Db.java
index e36cfef..6eeca2a 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Db.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Db.java
@@ -183,6 +183,14 @@ public class Db extends CatalogObjectImpl implements FeDb {
   public Table getTable(String tblName) { return tableCache_.get(tblName); }
 
   /**
+   * Returns the Table with the given name if present in the table cache or null if the
+   * table does not exist in the cache. If the table is unloaded, the result is an
+   * IncompleteTable.
+   */
+  @Override
+  public Table getTableIfCached(String tblName) { return getTable(tblName); }
+
+  /**
    * Removes the table name and any cached metadata from the Table cache.
    */
   public Table removeTable(String tableName) {
diff --git a/fe/src/main/java/org/apache/impala/catalog/FeCatalog.java b/fe/src/main/java/org/apache/impala/catalog/FeCatalog.java
index e0e63c2..093ddd7 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FeCatalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FeCatalog.java
@@ -40,11 +40,18 @@ public interface FeCatalog {
       throws DatabaseNotFoundException;
 
   /** @see Catalog#getTable(String, String) */
-  FeTable getTable(String db_name, String table_name)
+  FeTable getTable(String dbName, String tableName)
       throws DatabaseNotFoundException;
 
   /** @see Catalog#getTableNoThrow(String, String) */
-  FeTable getTableNoThrow(String db_name, String table_name);
+  FeTable getTableNoThrow(String dbName, String tableName);
+
+  /** @see Catalog#getTableIfCached(String, String) */
+  FeTable getTableIfCached(String dbName, String tableName)
+      throws DatabaseNotFoundException;
+
+  /** @see Catalog#getTableIfCachedNoThrow(String, String) */
+  FeTable getTableIfCachedNoThrow(String dbName, String tableName);
 
   /** @see Catalog#getTCatalogObject(TCatalogObject) */
   TCatalogObject getTCatalogObject(TCatalogObject objectDesc)
@@ -55,7 +62,7 @@ public interface FeCatalog {
 
   /** @see Catalog#getHdfsPartition(String, String, List) */
   FeFsPartition getHdfsPartition(String db, String tbl,
-      List<TPartitionKeyValue> partition_spec) throws CatalogException;
+      List<TPartitionKeyValue> partitionSpec) throws CatalogException;
 
   /** @see Catalog#getDataSources(PatternMatcher) */
   List<? extends FeDataSource> getDataSources(PatternMatcher createHivePatternMatcher);
diff --git a/fe/src/main/java/org/apache/impala/catalog/FeDb.java b/fe/src/main/java/org/apache/impala/catalog/FeDb.java
index 11e5aa1..f80dc47 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FeDb.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FeDb.java
@@ -62,6 +62,12 @@ public interface FeDb extends HasName {
   FeTable getTable(String tbl);
 
   /**
+   * @return the table with the given name if it's completely loaded in the cache.
+   * Otherwise, return an IncompleteTable for it.
+   */
+  FeTable getTableIfCached(String tbl);
+
+  /**
    * @return the names of the tables within this database
    */
   List<String> getAllTableNames();
diff --git a/fe/src/main/java/org/apache/impala/catalog/local/LocalCatalog.java b/fe/src/main/java/org/apache/impala/catalog/local/LocalCatalog.java
index 2a6f438..16e6502 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/LocalCatalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalCatalog.java
@@ -131,6 +131,22 @@ public class LocalCatalog implements FeCatalog {
   }
 
   @Override
+  public FeTable getTableIfCached(String dbName, String tableName)
+      throws DatabaseNotFoundException {
+    return getDbOrThrow(dbName).getTableIfCached(tableName);
+  }
+
+  @Override
+  public FeTable getTableIfCachedNoThrow(String dbName, String tableName) {
+    try {
+      return getTableIfCached(dbName, tableName);
+    } catch (Exception e) {
+      // pass
+    }
+    return null;
+  }
+
+  @Override
   public TCatalogObject getTCatalogObject(TCatalogObject objectDesc)
       throws CatalogException {
     // TODO(todd): this probably makes the /catalog page not load with an error.
diff --git a/fe/src/main/java/org/apache/impala/catalog/local/LocalDb.java b/fe/src/main/java/org/apache/impala/catalog/local/LocalDb.java
index ef1107b..2dcce7a 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/LocalDb.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalDb.java
@@ -103,7 +103,7 @@ class LocalDb implements FeDb {
   }
 
   @Override
-  public FeTable getTable(String tblName) {
+  public FeTable getTableIfCached(String tblName) {
     Preconditions.checkNotNull(tblName);
     tblName = tblName.toLowerCase();
     loadTableNames();
@@ -111,8 +111,13 @@ class LocalDb implements FeDb {
       // Table doesn't exist.
       return null;
     }
-    FeTable tbl = tables_.get(tblName);
-    if (tbl == null) {
+    return tables_.get(tblName);
+  }
+
+  @Override
+  public FeTable getTable(String tblName) {
+    FeTable tbl = getTableIfCached(tblName);
+    if (tbl instanceof LocalIncompleteTable) {
       // The table exists but hasn't been loaded yet.
       try{
         tbl = LocalTable.load(this, tblName);
@@ -159,7 +164,7 @@ class LocalDb implements FeDb {
     try {
       List<String> names = catalog_.getMetaProvider().loadTableNames(name_);
       for (String tableName : names) {
-        newMap.put(tableName.toLowerCase(), null);
+        newMap.put(tableName.toLowerCase(), new LocalIncompleteTable(this, tableName));
       }
     } catch (TException e) {
       throw new LocalCatalogException(String.format(
diff --git a/fe/src/main/java/org/apache/impala/catalog/local/LocalIncompleteTable.java b/fe/src/main/java/org/apache/impala/catalog/local/LocalIncompleteTable.java
new file mode 100644
index 0000000..58d7f2a
--- /dev/null
+++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalIncompleteTable.java
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+package org.apache.impala.catalog.local;
+
+import org.apache.impala.catalog.FeIncompleteTable;
+import org.apache.impala.common.ImpalaException;
+import org.apache.impala.thrift.TTableDescriptor;
+
+import java.util.Set;
+
+/**
+ * FeTable implementation which represents an unloaded table in the LocalCatalog
+ * implementation. This is used so operations that don't require the whole table meta,
+ * like GetTables HiveServer2 operation, can get a quick response.
+ */
+public class LocalIncompleteTable extends LocalTable implements FeIncompleteTable {
+
+  public LocalIncompleteTable(LocalDb db, String tblName) {
+    super(db, tblName);
+  }
+
+  @Override
+  public ImpalaException getCause() { return null; }
+
+  @Override
+  public TTableDescriptor toThriftDescriptor(int tableId,
+      Set<Long> referencedPartitions) {
+    throw new RuntimeException("Not serializable as descriptor");
+  }
+
+  @Override
+  public boolean isLoaded() { return false; }
+}
diff --git a/fe/src/main/java/org/apache/impala/service/MetadataOp.java b/fe/src/main/java/org/apache/impala/service/MetadataOp.java
index 349ee9f..b82d1b5 100644
--- a/fe/src/main/java/org/apache/impala/service/MetadataOp.java
+++ b/fe/src/main/java/org/apache/impala/service/MetadataOp.java
@@ -303,7 +303,13 @@ public class MetadataOp {
         List<String> tableComments = Lists.newArrayList();
         List<String> tableTypes = Lists.newArrayList();
         for (String tabName: fe.getTableNames(db.getName(), tablePatternMatcher, user)) {
-          FeTable table = catalog.getTableNoThrow(db.getName(), tabName);
+          FeTable table;
+          if (columnPatternMatcher == PatternMatcher.MATCHER_MATCH_NONE) {
+            // Don't need to completely load the table meta if columns are not required.
+            table = catalog.getTableIfCachedNoThrow(db.getName(), tabName);
+          } else {
+            table = catalog.getTableNoThrow(db.getName(), tabName);
+          }
           if (table == null) {
             result.missingTbls.add(new TableName(db.getName(), tabName));
             continue;
diff --git a/fe/src/test/java/org/apache/impala/catalog/local/LocalCatalogTest.java b/fe/src/test/java/org/apache/impala/catalog/local/LocalCatalogTest.java
index 3fccc91..059a4b0 100644
--- a/fe/src/test/java/org/apache/impala/catalog/local/LocalCatalogTest.java
+++ b/fe/src/test/java/org/apache/impala/catalog/local/LocalCatalogTest.java
@@ -22,6 +22,7 @@ import static org.junit.Assert.*;
 import java.util.List;
 import java.util.Set;
 
+import org.apache.hive.service.rpc.thrift.TGetColumnsReq;
 import org.apache.hive.service.rpc.thrift.TGetSchemasReq;
 import org.apache.hive.service.rpc.thrift.TGetTablesReq;
 import org.apache.impala.analysis.Expr;
@@ -40,6 +41,7 @@ import org.apache.impala.catalog.Type;
 import org.apache.impala.service.BackendConfig;
 import org.apache.impala.service.FeSupport;
 import org.apache.impala.service.Frontend;
+import org.apache.impala.service.MetadataOp;
 import org.apache.impala.thrift.TCatalogObjectType;
 import org.apache.impala.thrift.TMetadataOpRequest;
 import org.apache.impala.thrift.TMetadataOpcode;
@@ -374,4 +376,33 @@ public class LocalCatalogTest {
     assertEquals(resp.rows.get(0).colVals.get(1).string_val, "functional");
     assertEquals(resp.rows.get(0).colVals.get(2).string_val, "bad_serde");
   }
+
+  /**
+   * Test GET_TABLES request won't trigger metadata loading for targeted tables.
+   */
+  @Test
+  public void testGetTableIfCached() throws Exception {
+    FeTable tbl = catalog_.getTableIfCachedNoThrow("functional", "alltypes");
+    assertTrue(tbl instanceof LocalIncompleteTable);
+
+    TMetadataOpRequest req = new TMetadataOpRequest();
+    req.opcode = TMetadataOpcode.GET_TABLES;
+    req.get_tables_req = new TGetTablesReq();
+    fe_.execHiveServer2MetadataOp(req);
+
+    // It's still a LocalIncompleteTable since GET_TABLES don't trigger metadata loading.
+    tbl = catalog_.getDb("functional").getTableIfCached("alltypes");
+    assertTrue(tbl instanceof LocalIncompleteTable);
+
+    // GET_COLUMNS request should trigger metadata loading for the targeted table.
+    req = new TMetadataOpRequest();
+    req.opcode = TMetadataOpcode.GET_COLUMNS;
+    req.get_columns_req = new TGetColumnsReq();
+    req.get_columns_req.setSchemaName("functional");
+    req.get_columns_req.setTableName("alltypes");
+    fe_.execHiveServer2MetadataOp(req);
+    // Table should be loaded
+    tbl = catalog_.getDb("functional").getTableIfCached("alltypes");
+    assertTrue(tbl instanceof LocalFsTable);
+  }
 }
diff --git a/fe/src/test/java/org/apache/impala/service/JdbcTest.java b/fe/src/test/java/org/apache/impala/service/JdbcTest.java
index 2f4d1db..0c91de6 100644
--- a/fe/src/test/java/org/apache/impala/service/JdbcTest.java
+++ b/fe/src/test/java/org/apache/impala/service/JdbcTest.java
@@ -398,10 +398,9 @@ public class JdbcTest extends JdbcTestBase {
         rs.getString("TABLE_NAME"));
 
     String remarks = rs.getString("REMARKS");
-    // IMPALA-7587: with catalog V2, if a table is not yet loaded before
-    // getTables(), then the 'remarks' field is left empty. getColumns()
-    // loads the table metadata, so later getTables() calls will return
-    // 'remarks' correctly.
+    // getTables() won't trigger full meta loading for tables. So for unloaded tables,
+    // the 'remarks' field is left empty. getColumns() loads the table metadata, so later
+    // getTables() calls will return 'remarks' correctly.
     assertTrue("Incorrect table comment: " + remarks,
         remarks.equals("") || remarks.equals("table comment"));
 
diff --git a/tests/hs2/test_hs2.py b/tests/hs2/test_hs2.py
index 0442250..0294b13 100644
--- a/tests/hs2/test_hs2.py
+++ b/tests/hs2/test_hs2.py
@@ -473,9 +473,7 @@ class TestHS2(HS2TestSuite):
         assert table_schema == "default"
         assert table_name == table
         assert table_type == "TABLE"
-        if (i == 0 and
-              not ImpalaTestClusterProperties.get_instance().is_catalog_v2_cluster()):
-          # IMPALA-7587: comments not returned for non-loaded tables with legacy catalog.
+        if i == 0:
           assert table_remarks == ""
         else:
           assert table_remarks == "table comment"