You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ai...@apache.org on 2018/04/25 23:14:31 UTC
[2/2] hive git commit: HIVE-18986: Table rename will run
java.lang.StackOverflowError in dataNucleus if the table contains large
number of columns (Aihua Xu, reviewed by Yongzhi Chen)
HIVE-18986: Table rename will run java.lang.StackOverflowError in dataNucleus if the table contains large number of columns (Aihua Xu, reviewed by Yongzhi Chen)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f30efbeb
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f30efbeb
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f30efbeb
Branch: refs/heads/master
Commit: f30efbebf2ff85c55a5d9e3e2f86e0a51341df78
Parents: 11b0d85
Author: Aihua Xu <ai...@apache.org>
Authored: Wed Apr 18 17:05:08 2018 -0700
Committer: Aihua Xu <ai...@apache.org>
Committed: Wed Apr 25 16:10:30 2018 -0700
----------------------------------------------------------------------
.../queries/clientpositive/alter_rename_table.q | 12 ++-
.../clientpositive/alter_rename_table.q.out | 88 ++++++++++++++++++++
.../apache/hadoop/hive/metastore/Batchable.java | 86 +++++++++++++++++++
.../hive/metastore/MetaStoreDirectSql.java | 61 ++------------
.../hadoop/hive/metastore/ObjectStore.java | 45 ++++++----
.../hive/metastore/conf/MetastoreConf.java | 5 ++
6 files changed, 227 insertions(+), 70 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f30efbeb/ql/src/test/queries/clientpositive/alter_rename_table.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/alter_rename_table.q b/ql/src/test/queries/clientpositive/alter_rename_table.q
index 53fb230..bcf6ad5 100644
--- a/ql/src/test/queries/clientpositive/alter_rename_table.q
+++ b/ql/src/test/queries/clientpositive/alter_rename_table.q
@@ -36,4 +36,14 @@ create table source.src1 like default.src;
load data local inpath '../../data/files/kv1.txt' overwrite into table source.src;
ALTER TABLE source.src RENAME TO target.src1;
-select * from target.src1 tablesample (10 rows);
\ No newline at end of file
+select * from target.src1 tablesample (10 rows);
+
+set metastore.rawstore.batch.size=1;
+set metastore.try.direct.sql=false;
+
+create table source.src2 like default.src;
+load data local inpath '../../data/files/kv1.txt' overwrite into table source.src2;
+ANALYZE TABlE source.src2 COMPUTE STATISTICS FOR COLUMNS;
+ALTER TABLE source.src2 RENAME TO target.src3;
+DESC FORMATTED target.src3;
+select * from target.src3 tablesample (10 rows);
http://git-wip-us.apache.org/repos/asf/hive/blob/f30efbeb/ql/src/test/results/clientpositive/alter_rename_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alter_rename_table.q.out b/ql/src/test/results/clientpositive/alter_rename_table.q.out
index 732d8a2..9ac8fd2 100644
--- a/ql/src/test/results/clientpositive/alter_rename_table.q.out
+++ b/ql/src/test/results/clientpositive/alter_rename_table.q.out
@@ -261,3 +261,91 @@ POSTHOOK: Input: target@src1
278 val_278
98 val_98
484 val_484
+PREHOOK: query: create table source.src2 like default.src
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:source
+PREHOOK: Output: source@src2
+POSTHOOK: query: create table source.src2 like default.src
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:source
+POSTHOOK: Output: source@src2
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table source.src2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: source@src2
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table source.src2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: source@src2
+PREHOOK: query: ANALYZE TABlE source.src2 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: source@src2
+#### A masked pattern was here ####
+PREHOOK: Output: source@src2
+POSTHOOK: query: ANALYZE TABlE source.src2 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: source@src2
+#### A masked pattern was here ####
+POSTHOOK: Output: source@src2
+PREHOOK: query: ALTER TABLE source.src2 RENAME TO target.src3
+PREHOOK: type: ALTERTABLE_RENAME
+PREHOOK: Input: source@src2
+PREHOOK: Output: source@src2
+POSTHOOK: query: ALTER TABLE source.src2 RENAME TO target.src3
+POSTHOOK: type: ALTERTABLE_RENAME
+POSTHOOK: Input: source@src2
+POSTHOOK: Output: source@src2
+POSTHOOK: Output: target@src3
+PREHOOK: query: DESC FORMATTED target.src3
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: target@src3
+POSTHOOK: query: DESC FORMATTED target.src3
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: target@src3
+# col_name data_type comment
+key string default
+value string default
+
+# Detailed Table Information
+Database: target
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+#### A masked pattern was here ####
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: select * from target.src3 tablesample (10 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: target@src3
+#### A masked pattern was here ####
+POSTHOOK: query: select * from target.src3 tablesample (10 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: target@src3
+#### A masked pattern was here ####
+238 val_238
+86 val_86
+311 val_311
+27 val_27
+165 val_165
+409 val_409
+255 val_255
+278 val_278
+98 val_98
+484 val_484
http://git-wip-us.apache.org/repos/asf/hive/blob/f30efbeb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java
new file mode 100644
index 0000000..7e488a5
--- /dev/null
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.metastore;
+
+import java.util.ArrayList;
+import java.util.List;
+import javax.jdo.Query;
+
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Base class to add the batch process for DirectSQL or RawStore queries.
+ * 1. Provide the implementation of run() to process one batch
+ * 2. Call Batchable.runBatched() to process the whole dataset
+ *
+ * I: input type, R: result type
+ */
+public abstract class Batchable<I, R> {
+ private static final Logger LOG = LoggerFactory.getLogger(Batchable.class);
+ public static final int NO_BATCHING = -1;
+
+ private List<Query> queries = null;
+ public abstract List<R> run(List<I> input) throws MetaException;
+
+ public void addQueryAfterUse(Query query) {
+ if (queries == null) {
+ queries = new ArrayList<Query>(1);
+ }
+ queries.add(query);
+ }
+ protected void addQueryAfterUse(Batchable<?, ?> b) {
+ if (b.queries == null) {
+ return;
+ }
+ if (queries == null) {
+ queries = new ArrayList<Query>(1);
+ }
+ queries.addAll(b.queries);
+ }
+ public void closeAllQueries() {
+ for (Query q : queries) {
+ try {
+ q.closeAll();
+ } catch (Throwable t) {
+ LOG.error("Failed to close a query", t);
+ }
+ }
+ }
+
+ public static <I, R> List<R> runBatched(
+ final int batchSize,
+ List<I> input,
+ Batchable<I, R> runnable) throws MetaException {
+ if (batchSize == NO_BATCHING || batchSize >= input.size()) {
+ return runnable.run(input);
+ }
+ List<R> result = new ArrayList<R>(input.size());
+ for (int fromIndex = 0, toIndex = 0; toIndex < input.size(); fromIndex = toIndex) {
+ toIndex = Math.min(fromIndex + batchSize, input.size());
+ List<I> batchedInput = input.subList(fromIndex, toIndex);
+ List<R> batchedOutput = runnable.run(batchedInput);
+ if (batchedOutput != null) {
+ result.addAll(batchedOutput);
+ }
+ }
+ return result;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/f30efbeb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index 997f5fd..4e0e887 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -455,7 +455,7 @@ class MetaStoreDirectSql {
if (partNames.isEmpty()) {
return Collections.emptyList();
}
- return runBatched(partNames, new Batchable<String, Partition>() {
+ return Batchable.runBatched(batchSize, partNames, new Batchable<String, Partition>() {
@Override
public List<Partition> run(List<String> input) throws MetaException {
String filter = "" + PARTITIONS + ".\"PART_NAME\" in (" + makeParams(input.size()) + ")";
@@ -596,7 +596,7 @@ class MetaStoreDirectSql {
}
// Get full objects. For Oracle/etc. do it in batches.
- List<Partition> result = runBatched(sqlResult, new Batchable<Object, Partition>() {
+ List<Partition> result = Batchable.runBatched(batchSize, sqlResult, new Batchable<Object, Partition>() {
@Override
public List<Partition> run(List<Object> input) throws MetaException {
return getPartitionsFromPartitionIds(catNameLcase, dbNameLcase, tblNameLcase, isView,
@@ -1374,7 +1374,7 @@ class MetaStoreDirectSql {
return ensureList(qResult);
}
};
- List<Object[]> list = runBatched(colNames, b);
+ List<Object[]> list = Batchable.runBatched(batchSize, colNames, b);
if (list.isEmpty()) {
return null;
}
@@ -1460,10 +1460,10 @@ class MetaStoreDirectSql {
+ " where \"CAT_NAME\" = ? and \"DB_NAME\" = ? and \"TABLE_NAME\" = ? "
+ " and \"COLUMN_NAME\" in (%1$s) and \"PARTITION_NAME\" in (%2$s)"
+ " group by \"PARTITION_NAME\"";
- List<Long> allCounts = runBatched(colNames, new Batchable<String, Long>() {
+ List<Long> allCounts = Batchable.runBatched(batchSize, colNames, new Batchable<String, Long>() {
@Override
public List<Long> run(final List<String> inputColName) throws MetaException {
- return runBatched(partNames, new Batchable<String, Long>() {
+ return Batchable.runBatched(batchSize, partNames, new Batchable<String, Long>() {
@Override
public List<Long> run(List<String> inputPartNames) throws MetaException {
long partsFound = 0;
@@ -1503,10 +1503,10 @@ class MetaStoreDirectSql {
final String tableName, final List<String> partNames, List<String> colNames, long partsFound,
final boolean useDensityFunctionForNDVEstimation, final double ndvTuner, final boolean enableBitVector) throws MetaException {
final boolean areAllPartsFound = (partsFound == partNames.size());
- return runBatched(colNames, new Batchable<String, ColumnStatisticsObj>() {
+ return Batchable.runBatched(batchSize, colNames, new Batchable<String, ColumnStatisticsObj>() {
@Override
public List<ColumnStatisticsObj> run(final List<String> inputColNames) throws MetaException {
- return runBatched(partNames, new Batchable<String, ColumnStatisticsObj>() {
+ return Batchable.runBatched(batchSize, partNames, new Batchable<String, ColumnStatisticsObj>() {
@Override
public List<ColumnStatisticsObj> run(List<String> inputPartNames) throws MetaException {
return columnStatisticsObjForPartitionsBatch(catName, dbName, tableName, inputPartNames,
@@ -1918,13 +1918,13 @@ class MetaStoreDirectSql {
}
};
try {
- return runBatched(partNames, b2);
+ return Batchable.runBatched(batchSize, partNames, b2);
} finally {
addQueryAfterUse(b2);
}
}
};
- List<Object[]> list = runBatched(colNames, b);
+ List<Object[]> list = Batchable.runBatched(batchSize, colNames, b);
List<ColumnStatistics> result = new ArrayList<ColumnStatistics>(
Math.min(list.size(), partNames.size()));
@@ -2027,49 +2027,6 @@ class MetaStoreDirectSql {
}
- private static abstract class Batchable<I, R> {
- private List<Query> queries = null;
- public abstract List<R> run(List<I> input) throws MetaException;
- public void addQueryAfterUse(Query query) {
- if (queries == null) {
- queries = new ArrayList<Query>(1);
- }
- queries.add(query);
- }
- protected void addQueryAfterUse(Batchable<?, ?> b) {
- if (b.queries == null) return;
- if (queries == null) {
- queries = new ArrayList<Query>(1);
- }
- queries.addAll(b.queries);
- }
- public void closeAllQueries() {
- for (Query q : queries) {
- try {
- q.closeAll();
- } catch (Throwable t) {
- LOG.error("Failed to close a query", t);
- }
- }
- }
- }
-
- private <I,R> List<R> runBatched(List<I> input, Batchable<I, R> runnable) throws MetaException {
- if (batchSize == NO_BATCHING || batchSize >= input.size()) {
- return runnable.run(input);
- }
- List<R> result = new ArrayList<R>(input.size());
- for (int fromIndex = 0, toIndex = 0; toIndex < input.size(); fromIndex = toIndex) {
- toIndex = Math.min(fromIndex + batchSize, input.size());
- List<I> batchedInput = input.subList(fromIndex, toIndex);
- List<R> batchedOutput = runnable.run(batchedInput);
- if (batchedOutput != null) {
- result.addAll(batchedOutput);
- }
- }
- return result;
- }
-
public List<SQLForeignKey> getForeignKeys(String catName, String parent_db_name,
String parent_tbl_name, String foreign_db_name,
String foreign_tbl_name) throws MetaException {
http://git-wip-us.apache.org/repos/asf/hive/blob/f30efbeb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index 184ecb6..1abd99d 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -244,6 +244,7 @@ public class ObjectStore implements RawStore, Configurable {
private static Properties prop = null;
private static PersistenceManagerFactory pmf = null;
private static boolean forTwoMetastoreTesting = false;
+ private int batchSize = Batchable.NO_BATCHING;
private static final DateTimeFormatter YMDHMS_FORMAT = DateTimeFormatter.ofPattern(
"yyyy_MM_dd_HH_mm_ss");
@@ -385,6 +386,8 @@ public class ObjectStore implements RawStore, Configurable {
directSqlErrors = Metrics.getOrCreateCounter(MetricsConstants.DIRECTSQL_ERRORS);
}
+ this.batchSize = MetastoreConf.getIntVar(conf, ConfVars.RAWSTORE_PARTITION_BATCH_SIZE);
+
if (!isInitialized) {
throw new RuntimeException(
"Unable to create persistence manager. Check dss.log for details");
@@ -8028,25 +8031,33 @@ public class ObjectStore implements RawStore, Configurable {
try {
openTransaction();
- List<MTableColumnStatistics> result = null;
validateTableCols(table, colNames);
Query query = queryWrapper.query = pm.newQuery(MTableColumnStatistics.class);
- String filter = "tableName == t1 && dbName == t2 && catName == t3 && (";
- String paramStr = "java.lang.String t1, java.lang.String t2, java.lang.String t3";
- Object[] params = new Object[colNames.size() + 3];
- params[0] = table.getTableName();
- params[1] = table.getDbName();
- params[2] = table.getCatName();
- for (int i = 0; i < colNames.size(); ++i) {
- filter += ((i == 0) ? "" : " || ") + "colName == c" + i;
- paramStr += ", java.lang.String c" + i;
- params[i + 3] = colNames.get(i);
- }
- filter += ")";
- query.setFilter(filter);
- query.declareParameters(paramStr);
- result = (List<MTableColumnStatistics>) query.executeWithArray(params);
- pm.retrieveAll(result);
+ List<MTableColumnStatistics> result =
+ Batchable.runBatched(batchSize, colNames, new Batchable<String, MTableColumnStatistics>() {
+ @Override
+ public List<MTableColumnStatistics> run(List<String> input)
+ throws MetaException {
+ String filter = "tableName == t1 && dbName == t2 && catName == t3 && (";
+ String paramStr = "java.lang.String t1, java.lang.String t2, java.lang.String t3";
+ Object[] params = new Object[input.size() + 3];
+ params[0] = table.getTableName();
+ params[1] = table.getDbName();
+ params[2] = table.getCatName();
+ for (int i = 0; i < input.size(); ++i) {
+ filter += ((i == 0) ? "" : " || ") + "colName == c" + i;
+ paramStr += ", java.lang.String c" + i;
+ params[i + 3] = input.get(i);
+ }
+ filter += ")";
+ query.setFilter(filter);
+ query.declareParameters(paramStr);
+ List<MTableColumnStatistics> paritial = (List<MTableColumnStatistics>) query.executeWithArray(params);
+ pm.retrieveAll(paritial);
+ return paritial;
+ }
+ });
+
if (result.size() > colNames.size()) {
throw new MetaException("Unexpected " + result.size() + " statistics for "
+ colNames.size() + " columns");
http://git-wip-us.apache.org/repos/asf/hive/blob/f30efbeb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
index 552eeca..35aa40c 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
@@ -856,6 +856,11 @@ public class MetastoreConf {
"hive.metastore.wm.default.pool.size", 4,
"The size of a default pool to create when creating an empty resource plan;\n" +
"If not positive, no default pool will be created."),
+ RAWSTORE_PARTITION_BATCH_SIZE("metastore.rawstore.batch.size",
+ "metastore.rawstore.batch.size", -1,
+ "Batch size for partition and other object retrieval from the underlying DB in JDO.\n" +
+ "The JDO implementation such as DataNucleus may run into issues when the generated queries are\n" +
+ "too large. Use this parameter to break the query into multiple batches. -1 means no batching."),
// Hive values we have copied and use as is
// These two are used to indicate that we are running tests