You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by th...@apache.org on 2015/08/04 22:25:16 UTC
hive git commit: HIVE-11407 : JDBC DatabaseMetaData.getTables with
large no of tables call leads to HS2 OOM (Sushanth Sowmyan via Thejas Nair)
Repository: hive
Updated Branches:
refs/heads/master c7e1d34b6 -> 46739a6a2
HIVE-11407 : JDBC DatabaseMetaData.getTables with large no of tables call leads to HS2 OOM (Sushanth Sowmyan via Thejas Nair)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/46739a6a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/46739a6a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/46739a6a
Branch: refs/heads/master
Commit: 46739a6a2de6bc672fda094d5505060a21a22179
Parents: c7e1d34
Author: Thejas Nair <th...@hortonworks.com>
Authored: Tue Aug 4 13:25:02 2015 -0700
Committer: Thejas Nair <th...@hortonworks.com>
Committed: Tue Aug 4 13:25:02 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/metadata/TableIterable.java | 104 +++++++++++++++++++
.../cli/operation/GetColumnsOperation.java | 10 +-
.../cli/operation/GetTablesOperation.java | 7 +-
3 files changed, 118 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/46739a6a/ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java
new file mode 100644
index 0000000..f3af39b
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.metadata;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.thrift.TException;
+
+/**
+ * Use this to get Table objects for a table list. It provides an iterator to
+ * on the resulting Table objects. It batches the calls to
+ * IMetaStoreClient.getTableObjectsByName to avoid OOM issues in HS2 (with
+ * embedded metastore) or MetaStore server (if HS2 is using remote metastore).
+ *
+ */
+public class TableIterable implements Iterable<Table> {
+
+ @Override
+ public Iterator<Table> iterator() {
+ return new Iterator<Table>() {
+
+ private final Iterator<String> tableNamesIter = tableNames.iterator();
+ private Iterator<org.apache.hadoop.hive.metastore.api.Table> batchIter = null;
+
+ @Override
+ public boolean hasNext() {
+ return ((batchIter != null) && batchIter.hasNext()) || tableNamesIter.hasNext();
+ }
+
+ @Override
+ public Table next() {
+ if ((batchIter == null) || !batchIter.hasNext()) {
+ getNextBatch();
+ }
+ return batchIter.next();
+ }
+
+ private void getNextBatch() {
+ // get next batch of table names in this list
+ List<String> nameBatch = new ArrayList<String>();
+ int batch_counter = 0;
+ while (batch_counter < batch_size && tableNamesIter.hasNext()) {
+ nameBatch.add(tableNamesIter.next());
+ batch_counter++;
+ }
+ // get the Table objects for this batch of table names and get iterator
+ // on it
+ try {
+ try {
+ batchIter = msc.getTableObjectsByName(dbname, nameBatch).iterator();
+ } catch (TException e) {
+ throw new HiveException(e);
+ }
+ } catch (HiveException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public void remove() {
+ throw new IllegalStateException(
+ "TableIterable is a read-only iterable and remove() is unsupported");
+ }
+ };
+ }
+
+ private final IMetaStoreClient msc;
+ private final String dbname;
+ private final List<String> tableNames;
+ private final int batch_size;
+
+ /**
+ * Primary constructor that fetches all tables in a given msc, given a Hive
+ * object,a db name and a table name list
+ */
+ public TableIterable(IMetaStoreClient msc, String dbname, List<String> tableNames, int batch_size)
+ throws TException {
+ this.msc = msc;
+ this.dbname = dbname;
+ this.tableNames = tableNames;
+ this.batch_size = batch_size;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/46739a6a/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java b/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
index 309f10f..8ecdc2e 100644
--- a/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
+++ b/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
@@ -30,10 +30,12 @@ import java.util.regex.Pattern;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.metadata.TableIterable;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hive.service.cli.ColumnDescriptor;
import org.apache.hive.service.cli.FetchOrientation;
import org.apache.hive.service.cli.HiveSQLException;
@@ -153,11 +155,15 @@ public class GetColumnsOperation extends MetadataOperation {
authorizeMetaGets(HiveOperationType.GET_COLUMNS, privObjs, cmdStr);
}
+ int maxBatchSize = SessionState.get().getConf().getIntVar(ConfVars.METASTORE_BATCH_RETRIEVE_MAX);
for (Entry<String, List<String>> dbTabs : db2Tabs.entrySet()) {
String dbName = dbTabs.getKey();
List<String> tableNames = dbTabs.getValue();
- for (Table table : metastoreClient.getTableObjectsByName(dbName, tableNames)) {
- TableSchema schema = new TableSchema(metastoreClient.getSchema(dbName, table.getTableName()));
+
+ for (Table table : new TableIterable(metastoreClient, dbName, tableNames, maxBatchSize)) {
+
+ TableSchema schema = new TableSchema(metastoreClient.getSchema(dbName,
+ table.getTableName()));
for (ColumnDescriptor column : schema.getColumnDescriptors()) {
if (columnPattern != null && !columnPattern.matcher(column.getName()).matches()) {
continue;
http://git-wip-us.apache.org/repos/asf/hive/blob/46739a6a/service/src/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/cli/operation/GetTablesOperation.java b/service/src/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
index 0e2fdc6..296280f 100644
--- a/service/src/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
+++ b/service/src/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
@@ -22,11 +22,14 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.ql.metadata.TableIterable;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hive.service.cli.FetchOrientation;
import org.apache.hive.service.cli.HiveSQLException;
import org.apache.hive.service.cli.OperationState;
@@ -88,9 +91,11 @@ public class GetTablesOperation extends MetadataOperation {
}
String tablePattern = convertIdentifierPattern(tableName, true);
+ int maxBatchSize = SessionState.get().getConf().getIntVar(ConfVars.METASTORE_BATCH_RETRIEVE_MAX);
+
for (String dbName : metastoreClient.getDatabases(schemaPattern)) {
List<String> tableNames = metastoreClient.getTables(dbName, tablePattern);
- for (Table table : metastoreClient.getTableObjectsByName(dbName, tableNames)) {
+ for (Table table : new TableIterable(metastoreClient, dbName, tableNames, maxBatchSize)) {
Object[] rowData = new Object[] {
DEFAULT_HIVE_CATALOG,
table.getDbName(),