You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by su...@apache.org on 2015/07/20 22:12:49 UTC

[39/50] [abbrv] hive git commit: HIVE-11255 - get_table_objects_by_name() in HiveMetaStore.java needs to retrieve table objects in multiple batches (Aihua Xu, reviewed by Chao Sun)

HIVE-11255 - get_table_objects_by_name() in HiveMetaStore.java needs to retrieve table objects in multiple batches (Aihua Xu, reviewed by Chao Sun)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/178b8d17
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/178b8d17
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/178b8d17

Branch: refs/heads/spark
Commit: 178b8d17fcaa5293dbe75eff5d39871a47f51c81
Parents: 854950b
Author: Aihua Xu <ai...@gmail.com>
Authored: Thu Jul 16 15:03:40 2015 -0700
Committer: Chao Sun <su...@apache.org>
Committed: Thu Jul 16 15:03:40 2015 -0700

----------------------------------------------------------------------
 .../hive/metastore/TestHiveMetaStore.java       | 57 +++++++++++++++++++-
 .../hadoop/hive/metastore/HiveMetaStore.java    | 48 +++++++++++++----
 2 files changed, 93 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/178b8d17/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
index dffeb34..06c6b76 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
@@ -99,6 +100,8 @@ public abstract class TestHiveMetaStore extends TestCase {
     hiveConf.set("hive.key2", "http://www.example.com");
     hiveConf.set("hive.key3", "");
     hiveConf.set("hive.key4", "0");
+
+    hiveConf.setIntVar(ConfVars.METASTORE_BATCH_RETRIEVE_MAX, 2);
   }
 
   public void testNameMethods() {
@@ -1330,7 +1333,7 @@ public abstract class TestHiveMetaStore extends TestCase {
       tableNames.add(tblName2);
       List<Table> foundTables = client.getTableObjectsByName(dbName, tableNames);
 
-      assertEquals(foundTables.size(), 2);
+      assertEquals(2, foundTables.size());
       for (Table t: foundTables) {
         if (t.getTableName().equals(tblName2)) {
           assertEquals(t.getSd().getLocation(), tbl2.getSd().getLocation());
@@ -2700,6 +2703,26 @@ public abstract class TestHiveMetaStore extends TestCase {
     return typ1;
   }
 
+  /**
+   * Creates a simple table under specified database
+   * @param dbName    the database name that the table will be created under
+   * @param tableName the table name to be created
+   * @throws Exception
+   */
+  private void createTable(String dbName, String tableName)
+      throws Exception {
+    List<FieldSchema> columns = new ArrayList<FieldSchema>();
+    columns.add(new FieldSchema("foo", "string", ""));
+    columns.add(new FieldSchema("bar", "string", ""));
+
+    Map<String, String> serdParams = new HashMap<String, String>();
+    serdParams.put(serdeConstants.SERIALIZATION_FORMAT, "1");
+
+    StorageDescriptor sd =  createStorageDescriptor(tableName, columns, null, serdParams);
+
+    createTable(dbName, tableName, null, null, null, sd, 0);
+  }
+
   private Table createTable(String dbName, String tblName, String owner,
       Map<String,String> tableParams, Map<String, String> partitionKeys,
       StorageDescriptor sd, int lastAccessTime) throws Exception {
@@ -2852,6 +2875,38 @@ public abstract class TestHiveMetaStore extends TestCase {
 
   }
 
+  /**
+   * Test table objects can be retrieved in batches
+   * @throws Exception
+   */
+  @Test
+  public void testGetTableObjects() throws Exception {
+    String dbName = "db";
+    List<String> tableNames = Arrays.asList("table1", "table2", "table3", "table4", "table5");
+
+    // Setup
+    silentDropDatabase(dbName);
+
+    Database db = new Database();
+    db.setName(dbName);
+    client.createDatabase(db);
+    for (String tableName : tableNames) {
+      createTable(dbName, tableName);
+    }
+
+    // Test
+    List<Table> tableObjs = client.getTableObjectsByName(dbName, tableNames);
+
+    // Verify
+    assertEquals(tableNames.size(), tableObjs.size());
+    for(Table table : tableObjs) {
+      assertTrue(tableNames.contains(table.getTableName().toLowerCase()));
+    }
+
+    // Cleanup
+    client.dropDatabase(dbName, true, true, true);
+  }
+
   private void checkDbOwnerType(String dbName, String ownerName, PrincipalType ownerType)
       throws NoSuchObjectException, MetaException, TException {
     Database db = client.getDatabase(dbName);

http://git-wip-us.apache.org/repos/asf/hive/blob/178b8d17/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index 0edf11f..ee2cea0 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -26,6 +26,7 @@ import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableListMultimap;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Multimaps;
+
 import org.apache.commons.cli.OptionBuilder;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -186,6 +187,7 @@ import org.apache.hadoop.hive.thrift.TUGIContainingTransport;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hive.common.util.HiveStringUtils;
 import org.apache.thrift.TException;
 import org.apache.thrift.TProcessor;
 import org.apache.thrift.protocol.TBinaryProtocol;
@@ -203,6 +205,7 @@ import org.apache.thrift.transport.TTransport;
 import org.apache.thrift.transport.TTransportFactory;
 
 import javax.jdo.JDOException;
+
 import java.io.IOException;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
@@ -1831,9 +1834,9 @@ public class HiveMetaStore extends ThriftHiveMetastore {
     /**
      * Gets multiple tables from the hive metastore.
      *
-     * @param dbname
+     * @param dbName
      *          The name of the database in which the tables reside
-     * @param names
+     * @param tableNames
      *          The names of the tables to get.
      *
      * @return A list of tables whose names are in the the list "names" and
@@ -1845,21 +1848,44 @@ public class HiveMetaStore extends ThriftHiveMetastore {
      * @throws UnknownDBException
      */
     @Override
-    public List<Table> get_table_objects_by_name(final String dbname, final List<String> names)
+    public List<Table> get_table_objects_by_name(final String dbName, final List<String> tableNames)
         throws MetaException, InvalidOperationException, UnknownDBException {
-      List<Table> tables = null;
-      startMultiTableFunction("get_multi_table", dbname, names);
+      List<Table> tables = new ArrayList<Table>();
+      startMultiTableFunction("get_multi_table", dbName, tableNames);
       Exception ex = null;
-      try {
+      int tableBatchSize = HiveConf.getIntVar(hiveConf,
+          ConfVars.METASTORE_BATCH_RETRIEVE_MAX);
 
-        if (dbname == null || dbname.isEmpty()) {
+      try {
+        if (dbName == null || dbName.isEmpty()) {
           throw new UnknownDBException("DB name is null or empty");
         }
-        if (names == null)
+        if (tableNames == null)
         {
-          throw new InvalidOperationException(dbname + " cannot find null tables");
+          throw new InvalidOperationException(dbName + " cannot find null tables");
+        }
+
+        // The list of table names could contain duplicates. RawStore.getTableObjectsByName()
+        // only guarantees returning no duplicate table objects in one batch. If we need
+        // to break into multiple batches, remove duplicates first.
+        List<String> distinctTableNames = tableNames;
+        if (distinctTableNames.size() > tableBatchSize) {
+          List<String> lowercaseTableNames = new ArrayList<String>();
+          for (String tableName : tableNames) {
+            lowercaseTableNames.add(HiveStringUtils.normalizeIdentifier(tableName));
+          }
+          distinctTableNames = new ArrayList<String>(new HashSet<String>(lowercaseTableNames));
+        }
+
+        RawStore ms = getMS();
+        int startIndex = 0;
+        // Retrieve the tables from the metastore in batches. Some databases like
+        // Oracle cannot have over 1000 expressions in a in-list
+        while (startIndex < distinctTableNames.size()) {
+          int endIndex = Math.min(startIndex + tableBatchSize, distinctTableNames.size());
+          tables.addAll(ms.getTableObjectsByName(dbName, distinctTableNames.subList(startIndex, endIndex)));
+          startIndex = endIndex;
         }
-        tables = getMS().getTableObjectsByName(dbname, names);
       } catch (Exception e) {
         ex = e;
         if (e instanceof MetaException) {
@@ -1872,7 +1898,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
           throw newMetaException(e);
         }
       } finally {
-        endFunction("get_multi_table", tables != null, ex, join(names, ","));
+        endFunction("get_multi_table", tables != null, ex, join(tableNames, ","));
       }
       return tables;
     }