You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by th...@apache.org on 2015/08/04 22:25:16 UTC

hive git commit: HIVE-11407 : JDBC DatabaseMetaData.getTables with large no of tables call leads to HS2 OOM (Sushanth Sowmyan via Thejas Nair)

Repository: hive
Updated Branches:
  refs/heads/master c7e1d34b6 -> 46739a6a2


HIVE-11407 : JDBC DatabaseMetaData.getTables with large no of tables call leads to HS2 OOM  (Sushanth Sowmyan via Thejas Nair)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/46739a6a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/46739a6a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/46739a6a

Branch: refs/heads/master
Commit: 46739a6a2de6bc672fda094d5505060a21a22179
Parents: c7e1d34
Author: Thejas Nair <th...@hortonworks.com>
Authored: Tue Aug 4 13:25:02 2015 -0700
Committer: Thejas Nair <th...@hortonworks.com>
Committed: Tue Aug 4 13:25:02 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/metadata/TableIterable.java  | 104 +++++++++++++++++++
 .../cli/operation/GetColumnsOperation.java      |  10 +-
 .../cli/operation/GetTablesOperation.java       |   7 +-
 3 files changed, 118 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/46739a6a/ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java
new file mode 100644
index 0000000..f3af39b
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.metadata;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.thrift.TException;
+
+/**
+ * Use this to get Table objects for a table list. It provides an iterator to
+ * on the resulting Table objects. It batches the calls to
+ * IMetaStoreClient.getTableObjectsByName to avoid OOM issues in HS2 (with
+ * embedded metastore) or MetaStore server (if HS2 is using remote metastore).
+ *
+ */
+public class TableIterable implements Iterable<Table> {
+
+  @Override
+  public Iterator<Table> iterator() {
+    return new Iterator<Table>() {
+
+      private final Iterator<String> tableNamesIter = tableNames.iterator();
+      private Iterator<org.apache.hadoop.hive.metastore.api.Table> batchIter = null;
+
+      @Override
+      public boolean hasNext() {
+        return ((batchIter != null) && batchIter.hasNext()) || tableNamesIter.hasNext();
+      }
+
+      @Override
+      public Table next() {
+        if ((batchIter == null) || !batchIter.hasNext()) {
+          getNextBatch();
+        }
+        return batchIter.next();
+      }
+
+      private void getNextBatch() {
+        // get next batch of table names in this list
+        List<String> nameBatch = new ArrayList<String>();
+        int batch_counter = 0;
+        while (batch_counter < batch_size && tableNamesIter.hasNext()) {
+          nameBatch.add(tableNamesIter.next());
+          batch_counter++;
+        }
+        // get the Table objects for this batch of table names and get iterator
+        // on it
+        try {
+          try {
+            batchIter = msc.getTableObjectsByName(dbname, nameBatch).iterator();
+          } catch (TException e) {
+            throw new HiveException(e);
+          }
+        } catch (HiveException e) {
+          throw new RuntimeException(e);
+        }
+      }
+
+      @Override
+      public void remove() {
+        throw new IllegalStateException(
+            "TableIterable is a read-only iterable and remove() is unsupported");
+      }
+    };
+  }
+
+  private final IMetaStoreClient msc;
+  private final String dbname;
+  private final List<String> tableNames;
+  private final int batch_size;
+
+  /**
+   * Primary constructor that fetches all tables in a given msc, given a Hive
+   * object,a db name and a table name list
+   */
+  public TableIterable(IMetaStoreClient msc, String dbname, List<String> tableNames, int batch_size)
+      throws TException {
+    this.msc = msc;
+    this.dbname = dbname;
+    this.tableNames = tableNames;
+    this.batch_size = batch_size;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/46739a6a/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java b/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
index 309f10f..8ecdc2e 100644
--- a/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
+++ b/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
@@ -30,10 +30,12 @@ import java.util.regex.Pattern;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.metadata.TableIterable;
 import org.apache.hadoop.hive.ql.plan.HiveOperation;
 import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
 import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject;
 import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType;
+import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hive.service.cli.ColumnDescriptor;
 import org.apache.hive.service.cli.FetchOrientation;
 import org.apache.hive.service.cli.HiveSQLException;
@@ -153,11 +155,15 @@ public class GetColumnsOperation extends MetadataOperation {
         authorizeMetaGets(HiveOperationType.GET_COLUMNS, privObjs, cmdStr);
       }
 
+      int maxBatchSize = SessionState.get().getConf().getIntVar(ConfVars.METASTORE_BATCH_RETRIEVE_MAX);
       for (Entry<String, List<String>> dbTabs : db2Tabs.entrySet()) {
         String dbName = dbTabs.getKey();
         List<String> tableNames = dbTabs.getValue();
-        for (Table table : metastoreClient.getTableObjectsByName(dbName, tableNames)) {
-          TableSchema schema = new TableSchema(metastoreClient.getSchema(dbName, table.getTableName()));
+
+        for (Table table : new TableIterable(metastoreClient, dbName, tableNames, maxBatchSize)) {
+
+          TableSchema schema = new TableSchema(metastoreClient.getSchema(dbName,
+              table.getTableName()));
           for (ColumnDescriptor column : schema.getColumnDescriptors()) {
             if (columnPattern != null && !columnPattern.matcher(column.getName()).matches()) {
               continue;

http://git-wip-us.apache.org/repos/asf/hive/blob/46739a6a/service/src/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/operation/GetTablesOperation.java b/service/src/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
index 0e2fdc6..296280f 100644
--- a/service/src/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
+++ b/service/src/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
@@ -22,11 +22,14 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.metadata.TableIterable;
 import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
 import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject;
 import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils;
+import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hive.service.cli.FetchOrientation;
 import org.apache.hive.service.cli.HiveSQLException;
 import org.apache.hive.service.cli.OperationState;
@@ -88,9 +91,11 @@ public class GetTablesOperation extends MetadataOperation {
       }
 
       String tablePattern = convertIdentifierPattern(tableName, true);
+      int maxBatchSize = SessionState.get().getConf().getIntVar(ConfVars.METASTORE_BATCH_RETRIEVE_MAX);
+
       for (String dbName : metastoreClient.getDatabases(schemaPattern)) {
         List<String> tableNames = metastoreClient.getTables(dbName, tablePattern);
-        for (Table table : metastoreClient.getTableObjectsByName(dbName, tableNames)) {
+        for (Table table : new TableIterable(metastoreClient, dbName, tableNames, maxBatchSize)) {
           Object[] rowData = new Object[] {
               DEFAULT_HIVE_CATALOG,
               table.getDbName(),