You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ai...@apache.org on 2018/04/25 23:14:30 UTC
[1/2] hive git commit: HIVE-19204: Detailed errors from some tasks
are not displayed to the client because the tasks don't set exception when
they fail (Aihua Xu, reviewed by Sahil Takiar)
Repository: hive
Updated Branches:
refs/heads/master f94ae7fec -> f30efbebf
HIVE-19204: Detailed errors from some tasks are not displayed to the client because the tasks don't set exception when they fail (Aihua Xu, reviewed by Sahil Takiar)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/11b0d857
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/11b0d857
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/11b0d857
Branch: refs/heads/master
Commit: 11b0d85786cd58469d5662c3027e9389cff07710
Parents: f94ae7f
Author: Aihua Xu <ai...@apache.org>
Authored: Mon Apr 16 10:36:02 2018 -0700
Committer: Aihua Xu <ai...@apache.org>
Committed: Wed Apr 25 16:09:42 2018 -0700
----------------------------------------------------------------------
.../java/org/apache/hadoop/hive/ql/Driver.java | 6 ++++-
.../hive/ql/exec/ColumnStatsUpdateTask.java | 1 +
.../hive/ql/exec/ExplainSQRewriteTask.java | 8 +++---
.../apache/hadoop/hive/ql/exec/ExplainTask.java | 5 ++--
.../hive/ql/exec/MaterializedViewTask.java | 1 +
.../hadoop/hive/ql/exec/ReplCopyTask.java | 4 +--
.../apache/hadoop/hive/ql/exec/StatsTask.java | 1 +
.../hadoop/hive/ql/exec/mr/ExecDriver.java | 4 +--
.../io/rcfile/truncate/ColumnTruncateTask.java | 26 +++++++-------------
.../ql/reexec/ReExecutionOverlayPlugin.java | 2 +-
10 files changed, 29 insertions(+), 29 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
index 4e8dbe2..f83bdaf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
@@ -2389,7 +2389,11 @@ public class Driver implements IDriver {
if(downstreamError != null) {
//here we assume that upstream code may have parametrized the msg from ErrorMsg
//so we want to keep it
- errorMessage += ". " + downstreamError.getMessage();
+ if (downstreamError.getMessage() != null) {
+ errorMessage += ". " + downstreamError.getMessage();
+ } else {
+ errorMessage += ". " + org.apache.hadoop.util.StringUtils.stringifyException(downstreamError);
+ }
}
else {
ErrorMsg em = ErrorMsg.getErrorMsg(exitVal);
http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
index 207b66f..a53ff5a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
@@ -300,6 +300,7 @@ public class ColumnStatsUpdateTask extends Task<ColumnStatsUpdateWork> {
Hive db = getHive();
return persistColumnStats(db);
} catch (Exception e) {
+ setException(e);
LOG.info("Failed to persist stats in metastore", e);
}
return 1;
http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainSQRewriteTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainSQRewriteTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainSQRewriteTask.java
index 80d54bf..1f9e9aa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainSQRewriteTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainSQRewriteTask.java
@@ -38,11 +38,13 @@ import org.apache.hadoop.hive.ql.parse.SubQueryDiagnostic;
import org.apache.hadoop.hive.ql.plan.ExplainSQRewriteWork;
import org.apache.hadoop.hive.ql.plan.api.StageType;
import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.util.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class ExplainSQRewriteTask extends Task<ExplainSQRewriteWork> implements Serializable {
private static final long serialVersionUID = 1L;
+ private final Logger LOG = LoggerFactory.getLogger(this.getClass().getName());
@Override
public StageType getType() {
@@ -76,8 +78,8 @@ public class ExplainSQRewriteTask extends Task<ExplainSQRewriteWork> implements
return (0);
}
catch (Exception e) {
- console.printError("Failed with exception " + e.getMessage(),
- "\n" + StringUtils.stringifyException(e));
+ setException(e);
+ LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
return (1);
}
finally {
http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
index 0b30721..34da025 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
@@ -65,7 +65,6 @@ import org.apache.hadoop.hive.ql.plan.api.StageType;
import org.apache.hadoop.hive.ql.security.authorization.AuthorizationFactory;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.util.StringUtils;
import org.apache.hive.common.util.AnnotationUtils;
import org.json.JSONArray;
import org.json.JSONException;
@@ -383,8 +382,8 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
return (0);
}
catch (Exception e) {
- console.printError("Failed with exception " + e.getMessage(),
- "\n" + StringUtils.stringifyException(e));
+ LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+ setException(e);
return (1);
}
finally {
http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/exec/MaterializedViewTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MaterializedViewTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MaterializedViewTask.java
index 834df84..19aef6c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MaterializedViewTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MaterializedViewTask.java
@@ -76,6 +76,7 @@ public class MaterializedViewTask extends Task<MaterializedViewDesc> implements
}
} catch (HiveException e) {
LOG.debug("Exception during materialized view cache update", e);
+ setException(e);
}
return 0;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java
index 1cad579..de270cf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java
@@ -163,8 +163,8 @@ public class ReplCopyTask extends Task<ReplCopyWork> implements Serializable {
}
return 0;
} catch (Exception e) {
- console.printError("Failed with exception " + e.getMessage(), "\n"
- + StringUtils.stringifyException(e));
+ LOG.error(StringUtils.stringifyException(e));
+ setException(e);
return (1);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java
index 00eb7de..7a4242a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java
@@ -112,6 +112,7 @@ public class StatsTask extends Task<StatsWork> implements Serializable {
}
} catch (Exception e) {
LOG.error("Failed to run stats task", e);
+ setException(e);
return 1;
}
return 0;
http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
index 7ff8ddc..1de782a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
@@ -465,9 +465,9 @@ public class ExecDriver extends Task<MapredWork> implements Serializable, Hadoop
jc.close();
}
} catch (Exception e) {
- LOG.warn("Failed while cleaning up ", e);
+ LOG.warn("Failed while cleaning up ", e);
} finally {
- HadoopJobExecHelper.runningJobs.remove(rj);
+ HadoopJobExecHelper.runningJobs.remove(rj);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java
index 434c3a8..8f21f7c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java
@@ -94,9 +94,8 @@ public class ColumnTruncateTask extends Task<ColumnTruncateWork> implements Seri
ctxCreated = true;
}
}catch (IOException e) {
- e.printStackTrace();
- console.printError("Error launching map-reduce job", "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+ setException(e);
return 5;
}
@@ -136,7 +135,8 @@ public class ColumnTruncateTask extends Task<ColumnTruncateWork> implements Seri
fs.mkdirs(tempOutPath);
}
} catch (IOException e) {
- console.printError("Can't make path " + outputPath + " : " + e.getMessage());
+ setException(e);
+ LOG.error("Can't make path " + outputPath, e);
return 6;
}
@@ -191,19 +191,11 @@ public class ColumnTruncateTask extends Task<ColumnTruncateWork> implements Seri
success = (returnVal == 0);
} catch (Exception e) {
- e.printStackTrace();
- setException(e);
- String mesg = " with exception '" + Utilities.getNameMessage(e) + "'";
- if (rj != null) {
- mesg = "Ended Job = " + rj.getJobID() + mesg;
- } else {
- mesg = "Job Submission failed" + mesg;
- }
-
+ String mesg = rj != null ? ("Ended Job = " + rj.getJobID()) : "Job Submission failed";
// Has to use full name to make sure it does not conflict with
// org.apache.commons.lang.StringUtils
- console.printError(mesg, "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ LOG.error(mesg, org.apache.hadoop.util.StringUtils.stringifyException(e));
+ setException(e);
success = false;
returnVal = 1;
@@ -220,9 +212,9 @@ public class ColumnTruncateTask extends Task<ColumnTruncateWork> implements Seri
ColumnTruncateMapper.jobClose(outputPath, success, job, console,
work.getDynPartCtx(), null);
} catch (Exception e) {
- LOG.warn("Failed while cleaning up ", e);
+ LOG.warn("Failed while cleaning up ", e);
} finally {
- HadoopJobExecHelper.runningJobs.remove(rj);
+ HadoopJobExecHelper.runningJobs.remove(rj);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecutionOverlayPlugin.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecutionOverlayPlugin.java b/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecutionOverlayPlugin.java
index 950903c..50803cc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecutionOverlayPlugin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecutionOverlayPlugin.java
@@ -42,7 +42,7 @@ public class ReExecutionOverlayPlugin implements IReExecutionPlugin {
if (hookContext.getHookType() == HookType.ON_FAILURE_HOOK) {
Throwable exception = hookContext.getException();
if (exception != null) {
- if (exception.getMessage().contains("Vertex failed,")) {
+ if (exception.getMessage() != null && exception.getMessage().contains("Vertex failed,")) {
retryPossible = true;
}
}
[2/2] hive git commit: HIVE-18986: Table rename will run
java.lang.StackOverflowError in dataNucleus if the table contains large
number of columns (Aihua Xu, reviewed by Yongzhi Chen)
Posted by ai...@apache.org.
HIVE-18986: Table rename will run java.lang.StackOverflowError in dataNucleus if the table contains large number of columns (Aihua Xu, reviewed by Yongzhi Chen)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f30efbeb
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f30efbeb
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f30efbeb
Branch: refs/heads/master
Commit: f30efbebf2ff85c55a5d9e3e2f86e0a51341df78
Parents: 11b0d85
Author: Aihua Xu <ai...@apache.org>
Authored: Wed Apr 18 17:05:08 2018 -0700
Committer: Aihua Xu <ai...@apache.org>
Committed: Wed Apr 25 16:10:30 2018 -0700
----------------------------------------------------------------------
.../queries/clientpositive/alter_rename_table.q | 12 ++-
.../clientpositive/alter_rename_table.q.out | 88 ++++++++++++++++++++
.../apache/hadoop/hive/metastore/Batchable.java | 86 +++++++++++++++++++
.../hive/metastore/MetaStoreDirectSql.java | 61 ++------------
.../hadoop/hive/metastore/ObjectStore.java | 45 ++++++----
.../hive/metastore/conf/MetastoreConf.java | 5 ++
6 files changed, 227 insertions(+), 70 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f30efbeb/ql/src/test/queries/clientpositive/alter_rename_table.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/alter_rename_table.q b/ql/src/test/queries/clientpositive/alter_rename_table.q
index 53fb230..bcf6ad5 100644
--- a/ql/src/test/queries/clientpositive/alter_rename_table.q
+++ b/ql/src/test/queries/clientpositive/alter_rename_table.q
@@ -36,4 +36,14 @@ create table source.src1 like default.src;
load data local inpath '../../data/files/kv1.txt' overwrite into table source.src;
ALTER TABLE source.src RENAME TO target.src1;
-select * from target.src1 tablesample (10 rows);
\ No newline at end of file
+select * from target.src1 tablesample (10 rows);
+
+set metastore.rawstore.batch.size=1;
+set metastore.try.direct.sql=false;
+
+create table source.src2 like default.src;
+load data local inpath '../../data/files/kv1.txt' overwrite into table source.src2;
+ANALYZE TABlE source.src2 COMPUTE STATISTICS FOR COLUMNS;
+ALTER TABLE source.src2 RENAME TO target.src3;
+DESC FORMATTED target.src3;
+select * from target.src3 tablesample (10 rows);
http://git-wip-us.apache.org/repos/asf/hive/blob/f30efbeb/ql/src/test/results/clientpositive/alter_rename_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alter_rename_table.q.out b/ql/src/test/results/clientpositive/alter_rename_table.q.out
index 732d8a2..9ac8fd2 100644
--- a/ql/src/test/results/clientpositive/alter_rename_table.q.out
+++ b/ql/src/test/results/clientpositive/alter_rename_table.q.out
@@ -261,3 +261,91 @@ POSTHOOK: Input: target@src1
278 val_278
98 val_98
484 val_484
+PREHOOK: query: create table source.src2 like default.src
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:source
+PREHOOK: Output: source@src2
+POSTHOOK: query: create table source.src2 like default.src
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:source
+POSTHOOK: Output: source@src2
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table source.src2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: source@src2
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table source.src2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: source@src2
+PREHOOK: query: ANALYZE TABlE source.src2 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: source@src2
+#### A masked pattern was here ####
+PREHOOK: Output: source@src2
+POSTHOOK: query: ANALYZE TABlE source.src2 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: source@src2
+#### A masked pattern was here ####
+POSTHOOK: Output: source@src2
+PREHOOK: query: ALTER TABLE source.src2 RENAME TO target.src3
+PREHOOK: type: ALTERTABLE_RENAME
+PREHOOK: Input: source@src2
+PREHOOK: Output: source@src2
+POSTHOOK: query: ALTER TABLE source.src2 RENAME TO target.src3
+POSTHOOK: type: ALTERTABLE_RENAME
+POSTHOOK: Input: source@src2
+POSTHOOK: Output: source@src2
+POSTHOOK: Output: target@src3
+PREHOOK: query: DESC FORMATTED target.src3
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: target@src3
+POSTHOOK: query: DESC FORMATTED target.src3
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: target@src3
+# col_name data_type comment
+key string default
+value string default
+
+# Detailed Table Information
+Database: target
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+#### A masked pattern was here ####
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: select * from target.src3 tablesample (10 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: target@src3
+#### A masked pattern was here ####
+POSTHOOK: query: select * from target.src3 tablesample (10 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: target@src3
+#### A masked pattern was here ####
+238 val_238
+86 val_86
+311 val_311
+27 val_27
+165 val_165
+409 val_409
+255 val_255
+278 val_278
+98 val_98
+484 val_484
http://git-wip-us.apache.org/repos/asf/hive/blob/f30efbeb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java
new file mode 100644
index 0000000..7e488a5
--- /dev/null
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.metastore;
+
+import java.util.ArrayList;
+import java.util.List;
+import javax.jdo.Query;
+
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Base class to add the batch process for DirectSQL or RawStore queries.
+ * 1. Provide the implementation of run() to process one batch
+ * 2. Call Batchable.runBatched() to process the whole dataset
+ *
+ * I: input type, R: result type
+ */
+public abstract class Batchable<I, R> {
+ private static final Logger LOG = LoggerFactory.getLogger(Batchable.class);
+ public static final int NO_BATCHING = -1;
+
+ private List<Query> queries = null;
+ public abstract List<R> run(List<I> input) throws MetaException;
+
+ public void addQueryAfterUse(Query query) {
+ if (queries == null) {
+ queries = new ArrayList<Query>(1);
+ }
+ queries.add(query);
+ }
+ protected void addQueryAfterUse(Batchable<?, ?> b) {
+ if (b.queries == null) {
+ return;
+ }
+ if (queries == null) {
+ queries = new ArrayList<Query>(1);
+ }
+ queries.addAll(b.queries);
+ }
+ public void closeAllQueries() {
+ for (Query q : queries) {
+ try {
+ q.closeAll();
+ } catch (Throwable t) {
+ LOG.error("Failed to close a query", t);
+ }
+ }
+ }
+
+ public static <I, R> List<R> runBatched(
+ final int batchSize,
+ List<I> input,
+ Batchable<I, R> runnable) throws MetaException {
+ if (batchSize == NO_BATCHING || batchSize >= input.size()) {
+ return runnable.run(input);
+ }
+ List<R> result = new ArrayList<R>(input.size());
+ for (int fromIndex = 0, toIndex = 0; toIndex < input.size(); fromIndex = toIndex) {
+ toIndex = Math.min(fromIndex + batchSize, input.size());
+ List<I> batchedInput = input.subList(fromIndex, toIndex);
+ List<R> batchedOutput = runnable.run(batchedInput);
+ if (batchedOutput != null) {
+ result.addAll(batchedOutput);
+ }
+ }
+ return result;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/f30efbeb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index 997f5fd..4e0e887 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -455,7 +455,7 @@ class MetaStoreDirectSql {
if (partNames.isEmpty()) {
return Collections.emptyList();
}
- return runBatched(partNames, new Batchable<String, Partition>() {
+ return Batchable.runBatched(batchSize, partNames, new Batchable<String, Partition>() {
@Override
public List<Partition> run(List<String> input) throws MetaException {
String filter = "" + PARTITIONS + ".\"PART_NAME\" in (" + makeParams(input.size()) + ")";
@@ -596,7 +596,7 @@ class MetaStoreDirectSql {
}
// Get full objects. For Oracle/etc. do it in batches.
- List<Partition> result = runBatched(sqlResult, new Batchable<Object, Partition>() {
+ List<Partition> result = Batchable.runBatched(batchSize, sqlResult, new Batchable<Object, Partition>() {
@Override
public List<Partition> run(List<Object> input) throws MetaException {
return getPartitionsFromPartitionIds(catNameLcase, dbNameLcase, tblNameLcase, isView,
@@ -1374,7 +1374,7 @@ class MetaStoreDirectSql {
return ensureList(qResult);
}
};
- List<Object[]> list = runBatched(colNames, b);
+ List<Object[]> list = Batchable.runBatched(batchSize, colNames, b);
if (list.isEmpty()) {
return null;
}
@@ -1460,10 +1460,10 @@ class MetaStoreDirectSql {
+ " where \"CAT_NAME\" = ? and \"DB_NAME\" = ? and \"TABLE_NAME\" = ? "
+ " and \"COLUMN_NAME\" in (%1$s) and \"PARTITION_NAME\" in (%2$s)"
+ " group by \"PARTITION_NAME\"";
- List<Long> allCounts = runBatched(colNames, new Batchable<String, Long>() {
+ List<Long> allCounts = Batchable.runBatched(batchSize, colNames, new Batchable<String, Long>() {
@Override
public List<Long> run(final List<String> inputColName) throws MetaException {
- return runBatched(partNames, new Batchable<String, Long>() {
+ return Batchable.runBatched(batchSize, partNames, new Batchable<String, Long>() {
@Override
public List<Long> run(List<String> inputPartNames) throws MetaException {
long partsFound = 0;
@@ -1503,10 +1503,10 @@ class MetaStoreDirectSql {
final String tableName, final List<String> partNames, List<String> colNames, long partsFound,
final boolean useDensityFunctionForNDVEstimation, final double ndvTuner, final boolean enableBitVector) throws MetaException {
final boolean areAllPartsFound = (partsFound == partNames.size());
- return runBatched(colNames, new Batchable<String, ColumnStatisticsObj>() {
+ return Batchable.runBatched(batchSize, colNames, new Batchable<String, ColumnStatisticsObj>() {
@Override
public List<ColumnStatisticsObj> run(final List<String> inputColNames) throws MetaException {
- return runBatched(partNames, new Batchable<String, ColumnStatisticsObj>() {
+ return Batchable.runBatched(batchSize, partNames, new Batchable<String, ColumnStatisticsObj>() {
@Override
public List<ColumnStatisticsObj> run(List<String> inputPartNames) throws MetaException {
return columnStatisticsObjForPartitionsBatch(catName, dbName, tableName, inputPartNames,
@@ -1918,13 +1918,13 @@ class MetaStoreDirectSql {
}
};
try {
- return runBatched(partNames, b2);
+ return Batchable.runBatched(batchSize, partNames, b2);
} finally {
addQueryAfterUse(b2);
}
}
};
- List<Object[]> list = runBatched(colNames, b);
+ List<Object[]> list = Batchable.runBatched(batchSize, colNames, b);
List<ColumnStatistics> result = new ArrayList<ColumnStatistics>(
Math.min(list.size(), partNames.size()));
@@ -2027,49 +2027,6 @@ class MetaStoreDirectSql {
}
- private static abstract class Batchable<I, R> {
- private List<Query> queries = null;
- public abstract List<R> run(List<I> input) throws MetaException;
- public void addQueryAfterUse(Query query) {
- if (queries == null) {
- queries = new ArrayList<Query>(1);
- }
- queries.add(query);
- }
- protected void addQueryAfterUse(Batchable<?, ?> b) {
- if (b.queries == null) return;
- if (queries == null) {
- queries = new ArrayList<Query>(1);
- }
- queries.addAll(b.queries);
- }
- public void closeAllQueries() {
- for (Query q : queries) {
- try {
- q.closeAll();
- } catch (Throwable t) {
- LOG.error("Failed to close a query", t);
- }
- }
- }
- }
-
- private <I,R> List<R> runBatched(List<I> input, Batchable<I, R> runnable) throws MetaException {
- if (batchSize == NO_BATCHING || batchSize >= input.size()) {
- return runnable.run(input);
- }
- List<R> result = new ArrayList<R>(input.size());
- for (int fromIndex = 0, toIndex = 0; toIndex < input.size(); fromIndex = toIndex) {
- toIndex = Math.min(fromIndex + batchSize, input.size());
- List<I> batchedInput = input.subList(fromIndex, toIndex);
- List<R> batchedOutput = runnable.run(batchedInput);
- if (batchedOutput != null) {
- result.addAll(batchedOutput);
- }
- }
- return result;
- }
-
public List<SQLForeignKey> getForeignKeys(String catName, String parent_db_name,
String parent_tbl_name, String foreign_db_name,
String foreign_tbl_name) throws MetaException {
http://git-wip-us.apache.org/repos/asf/hive/blob/f30efbeb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index 184ecb6..1abd99d 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -244,6 +244,7 @@ public class ObjectStore implements RawStore, Configurable {
private static Properties prop = null;
private static PersistenceManagerFactory pmf = null;
private static boolean forTwoMetastoreTesting = false;
+ private int batchSize = Batchable.NO_BATCHING;
private static final DateTimeFormatter YMDHMS_FORMAT = DateTimeFormatter.ofPattern(
"yyyy_MM_dd_HH_mm_ss");
@@ -385,6 +386,8 @@ public class ObjectStore implements RawStore, Configurable {
directSqlErrors = Metrics.getOrCreateCounter(MetricsConstants.DIRECTSQL_ERRORS);
}
+ this.batchSize = MetastoreConf.getIntVar(conf, ConfVars.RAWSTORE_PARTITION_BATCH_SIZE);
+
if (!isInitialized) {
throw new RuntimeException(
"Unable to create persistence manager. Check dss.log for details");
@@ -8028,25 +8031,33 @@ public class ObjectStore implements RawStore, Configurable {
try {
openTransaction();
- List<MTableColumnStatistics> result = null;
validateTableCols(table, colNames);
Query query = queryWrapper.query = pm.newQuery(MTableColumnStatistics.class);
- String filter = "tableName == t1 && dbName == t2 && catName == t3 && (";
- String paramStr = "java.lang.String t1, java.lang.String t2, java.lang.String t3";
- Object[] params = new Object[colNames.size() + 3];
- params[0] = table.getTableName();
- params[1] = table.getDbName();
- params[2] = table.getCatName();
- for (int i = 0; i < colNames.size(); ++i) {
- filter += ((i == 0) ? "" : " || ") + "colName == c" + i;
- paramStr += ", java.lang.String c" + i;
- params[i + 3] = colNames.get(i);
- }
- filter += ")";
- query.setFilter(filter);
- query.declareParameters(paramStr);
- result = (List<MTableColumnStatistics>) query.executeWithArray(params);
- pm.retrieveAll(result);
+ List<MTableColumnStatistics> result =
+ Batchable.runBatched(batchSize, colNames, new Batchable<String, MTableColumnStatistics>() {
+ @Override
+ public List<MTableColumnStatistics> run(List<String> input)
+ throws MetaException {
+ String filter = "tableName == t1 && dbName == t2 && catName == t3 && (";
+ String paramStr = "java.lang.String t1, java.lang.String t2, java.lang.String t3";
+ Object[] params = new Object[input.size() + 3];
+ params[0] = table.getTableName();
+ params[1] = table.getDbName();
+ params[2] = table.getCatName();
+ for (int i = 0; i < input.size(); ++i) {
+ filter += ((i == 0) ? "" : " || ") + "colName == c" + i;
+ paramStr += ", java.lang.String c" + i;
+ params[i + 3] = input.get(i);
+ }
+ filter += ")";
+ query.setFilter(filter);
+ query.declareParameters(paramStr);
+ List<MTableColumnStatistics> paritial = (List<MTableColumnStatistics>) query.executeWithArray(params);
+ pm.retrieveAll(paritial);
+ return paritial;
+ }
+ });
+
if (result.size() > colNames.size()) {
throw new MetaException("Unexpected " + result.size() + " statistics for "
+ colNames.size() + " columns");
http://git-wip-us.apache.org/repos/asf/hive/blob/f30efbeb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
index 552eeca..35aa40c 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
@@ -856,6 +856,11 @@ public class MetastoreConf {
"hive.metastore.wm.default.pool.size", 4,
"The size of a default pool to create when creating an empty resource plan;\n" +
"If not positive, no default pool will be created."),
+ RAWSTORE_PARTITION_BATCH_SIZE("metastore.rawstore.batch.size",
+ "metastore.rawstore.batch.size", -1,
+ "Batch size for partition and other object retrieval from the underlying DB in JDO.\n" +
+ "The JDO implementation such as DataNucleus may run into issues when the generated queries are\n" +
+ "too large. Use this parameter to break the query into multiple batches. -1 means no batching."),
// Hive values we have copied and use as is
// These two are used to indicate that we are running tests