You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by su...@apache.org on 2015/07/20 22:12:49 UTC
[39/50] [abbrv] hive git commit: HIVE-11255 -
get_table_objects_by_name() in HiveMetaStore.java needs to retrieve table
objects in multiple batches (Aihua Xu, reviewed by Chao Sun)
HIVE-11255 - get_table_objects_by_name() in HiveMetaStore.java needs to retrieve table objects in multiple batches (Aihua Xu, reviewed by Chao Sun)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/178b8d17
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/178b8d17
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/178b8d17
Branch: refs/heads/spark
Commit: 178b8d17fcaa5293dbe75eff5d39871a47f51c81
Parents: 854950b
Author: Aihua Xu <ai...@gmail.com>
Authored: Thu Jul 16 15:03:40 2015 -0700
Committer: Chao Sun <su...@apache.org>
Committed: Thu Jul 16 15:03:40 2015 -0700
----------------------------------------------------------------------
.../hive/metastore/TestHiveMetaStore.java | 57 +++++++++++++++++++-
.../hadoop/hive/metastore/HiveMetaStore.java | 48 +++++++++++++----
2 files changed, 93 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/178b8d17/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
index dffeb34..06c6b76 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
@@ -99,6 +100,8 @@ public abstract class TestHiveMetaStore extends TestCase {
hiveConf.set("hive.key2", "http://www.example.com");
hiveConf.set("hive.key3", "");
hiveConf.set("hive.key4", "0");
+
+ hiveConf.setIntVar(ConfVars.METASTORE_BATCH_RETRIEVE_MAX, 2);
}
public void testNameMethods() {
@@ -1330,7 +1333,7 @@ public abstract class TestHiveMetaStore extends TestCase {
tableNames.add(tblName2);
List<Table> foundTables = client.getTableObjectsByName(dbName, tableNames);
- assertEquals(foundTables.size(), 2);
+ assertEquals(2, foundTables.size());
for (Table t: foundTables) {
if (t.getTableName().equals(tblName2)) {
assertEquals(t.getSd().getLocation(), tbl2.getSd().getLocation());
@@ -2700,6 +2703,26 @@ public abstract class TestHiveMetaStore extends TestCase {
return typ1;
}
+ /**
+ * Creates a simple table under specified database
+ * @param dbName the database name that the table will be created under
+ * @param tableName the table name to be created
+ * @throws Exception
+ */
+ private void createTable(String dbName, String tableName)
+ throws Exception {
+ List<FieldSchema> columns = new ArrayList<FieldSchema>();
+ columns.add(new FieldSchema("foo", "string", ""));
+ columns.add(new FieldSchema("bar", "string", ""));
+
+ Map<String, String> serdParams = new HashMap<String, String>();
+ serdParams.put(serdeConstants.SERIALIZATION_FORMAT, "1");
+
+ StorageDescriptor sd = createStorageDescriptor(tableName, columns, null, serdParams);
+
+ createTable(dbName, tableName, null, null, null, sd, 0);
+ }
+
private Table createTable(String dbName, String tblName, String owner,
Map<String,String> tableParams, Map<String, String> partitionKeys,
StorageDescriptor sd, int lastAccessTime) throws Exception {
@@ -2852,6 +2875,38 @@ public abstract class TestHiveMetaStore extends TestCase {
}
+ /**
+ * Test table objects can be retrieved in batches
+ * @throws Exception
+ */
+ @Test
+ public void testGetTableObjects() throws Exception {
+ String dbName = "db";
+ List<String> tableNames = Arrays.asList("table1", "table2", "table3", "table4", "table5");
+
+ // Setup
+ silentDropDatabase(dbName);
+
+ Database db = new Database();
+ db.setName(dbName);
+ client.createDatabase(db);
+ for (String tableName : tableNames) {
+ createTable(dbName, tableName);
+ }
+
+ // Test
+ List<Table> tableObjs = client.getTableObjectsByName(dbName, tableNames);
+
+ // Verify
+ assertEquals(tableNames.size(), tableObjs.size());
+ for(Table table : tableObjs) {
+ assertTrue(tableNames.contains(table.getTableName().toLowerCase()));
+ }
+
+ // Cleanup
+ client.dropDatabase(dbName, true, true, true);
+ }
+
private void checkDbOwnerType(String dbName, String ownerName, PrincipalType ownerType)
throws NoSuchObjectException, MetaException, TException {
Database db = client.getDatabase(dbName);
http://git-wip-us.apache.org/repos/asf/hive/blob/178b8d17/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index 0edf11f..ee2cea0 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -26,6 +26,7 @@ import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimaps;
+
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -186,6 +187,7 @@ import org.apache.hadoop.hive.thrift.TUGIContainingTransport;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils;
+import org.apache.hive.common.util.HiveStringUtils;
import org.apache.thrift.TException;
import org.apache.thrift.TProcessor;
import org.apache.thrift.protocol.TBinaryProtocol;
@@ -203,6 +205,7 @@ import org.apache.thrift.transport.TTransport;
import org.apache.thrift.transport.TTransportFactory;
import javax.jdo.JDOException;
+
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
@@ -1831,9 +1834,9 @@ public class HiveMetaStore extends ThriftHiveMetastore {
/**
* Gets multiple tables from the hive metastore.
*
- * @param dbname
+ * @param dbName
* The name of the database in which the tables reside
- * @param names
+ * @param tableNames
* The names of the tables to get.
*
* @return A list of tables whose names are in the the list "names" and
@@ -1845,21 +1848,44 @@ public class HiveMetaStore extends ThriftHiveMetastore {
* @throws UnknownDBException
*/
@Override
- public List<Table> get_table_objects_by_name(final String dbname, final List<String> names)
+ public List<Table> get_table_objects_by_name(final String dbName, final List<String> tableNames)
throws MetaException, InvalidOperationException, UnknownDBException {
- List<Table> tables = null;
- startMultiTableFunction("get_multi_table", dbname, names);
+ List<Table> tables = new ArrayList<Table>();
+ startMultiTableFunction("get_multi_table", dbName, tableNames);
Exception ex = null;
- try {
+ int tableBatchSize = HiveConf.getIntVar(hiveConf,
+ ConfVars.METASTORE_BATCH_RETRIEVE_MAX);
- if (dbname == null || dbname.isEmpty()) {
+ try {
+ if (dbName == null || dbName.isEmpty()) {
throw new UnknownDBException("DB name is null or empty");
}
- if (names == null)
+ if (tableNames == null)
{
- throw new InvalidOperationException(dbname + " cannot find null tables");
+ throw new InvalidOperationException(dbName + " cannot find null tables");
+ }
+
+ // The list of table names could contain duplicates. RawStore.getTableObjectsByName()
+ // only guarantees returning no duplicate table objects in one batch. If we need
+ // to break into multiple batches, remove duplicates first.
+ List<String> distinctTableNames = tableNames;
+ if (distinctTableNames.size() > tableBatchSize) {
+ List<String> lowercaseTableNames = new ArrayList<String>();
+ for (String tableName : tableNames) {
+ lowercaseTableNames.add(HiveStringUtils.normalizeIdentifier(tableName));
+ }
+ distinctTableNames = new ArrayList<String>(new HashSet<String>(lowercaseTableNames));
+ }
+
+ RawStore ms = getMS();
+ int startIndex = 0;
+ // Retrieve the tables from the metastore in batches. Some databases like
+ // Oracle cannot have over 1000 expressions in a in-list
+ while (startIndex < distinctTableNames.size()) {
+ int endIndex = Math.min(startIndex + tableBatchSize, distinctTableNames.size());
+ tables.addAll(ms.getTableObjectsByName(dbName, distinctTableNames.subList(startIndex, endIndex)));
+ startIndex = endIndex;
}
- tables = getMS().getTableObjectsByName(dbname, names);
} catch (Exception e) {
ex = e;
if (e instanceof MetaException) {
@@ -1872,7 +1898,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
throw newMetaException(e);
}
} finally {
- endFunction("get_multi_table", tables != null, ex, join(names, ","));
+ endFunction("get_multi_table", tables != null, ex, join(tableNames, ","));
}
return tables;
}