You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ai...@apache.org on 2018/04/25 23:14:30 UTC

[1/2] hive git commit: HIVE-19204: Detailed errors from some tasks are not displayed to the client because the tasks don't set exception when they fail (Aihua Xu, reviewed by Sahil Takiar)

Repository: hive
Updated Branches:
  refs/heads/master f94ae7fec -> f30efbebf


HIVE-19204: Detailed errors from some tasks are not displayed to the client because the tasks don't set exception when they fail (Aihua Xu, reviewed by Sahil Takiar)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/11b0d857
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/11b0d857
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/11b0d857

Branch: refs/heads/master
Commit: 11b0d85786cd58469d5662c3027e9389cff07710
Parents: f94ae7f
Author: Aihua Xu <ai...@apache.org>
Authored: Mon Apr 16 10:36:02 2018 -0700
Committer: Aihua Xu <ai...@apache.org>
Committed: Wed Apr 25 16:09:42 2018 -0700

----------------------------------------------------------------------
 .../java/org/apache/hadoop/hive/ql/Driver.java  |  6 ++++-
 .../hive/ql/exec/ColumnStatsUpdateTask.java     |  1 +
 .../hive/ql/exec/ExplainSQRewriteTask.java      |  8 +++---
 .../apache/hadoop/hive/ql/exec/ExplainTask.java |  5 ++--
 .../hive/ql/exec/MaterializedViewTask.java      |  1 +
 .../hadoop/hive/ql/exec/ReplCopyTask.java       |  4 +--
 .../apache/hadoop/hive/ql/exec/StatsTask.java   |  1 +
 .../hadoop/hive/ql/exec/mr/ExecDriver.java      |  4 +--
 .../io/rcfile/truncate/ColumnTruncateTask.java  | 26 +++++++-------------
 .../ql/reexec/ReExecutionOverlayPlugin.java     |  2 +-
 10 files changed, 29 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
index 4e8dbe2..f83bdaf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
@@ -2389,7 +2389,11 @@ public class Driver implements IDriver {
     if(downstreamError != null) {
       //here we assume that upstream code may have parametrized the msg from ErrorMsg
       //so we want to keep it
-      errorMessage += ". " + downstreamError.getMessage();
+      if (downstreamError.getMessage() != null) {
+        errorMessage += ". " + downstreamError.getMessage();
+      } else {
+        errorMessage += ". " + org.apache.hadoop.util.StringUtils.stringifyException(downstreamError);
+      }
     }
     else {
       ErrorMsg em = ErrorMsg.getErrorMsg(exitVal);

http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
index 207b66f..a53ff5a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
@@ -300,6 +300,7 @@ public class ColumnStatsUpdateTask extends Task<ColumnStatsUpdateWork> {
       Hive db = getHive();
       return persistColumnStats(db);
     } catch (Exception e) {
+      setException(e);
       LOG.info("Failed to persist stats in metastore", e);
     }
     return 1;

http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainSQRewriteTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainSQRewriteTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainSQRewriteTask.java
index 80d54bf..1f9e9aa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainSQRewriteTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainSQRewriteTask.java
@@ -38,11 +38,13 @@ import org.apache.hadoop.hive.ql.parse.SubQueryDiagnostic;
 import org.apache.hadoop.hive.ql.plan.ExplainSQRewriteWork;
 import org.apache.hadoop.hive.ql.plan.api.StageType;
 import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.util.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 
 public class ExplainSQRewriteTask extends Task<ExplainSQRewriteWork> implements Serializable {
   private static final long serialVersionUID = 1L;
+  private final Logger LOG = LoggerFactory.getLogger(this.getClass().getName());
 
   @Override
   public StageType getType() {
@@ -76,8 +78,8 @@ public class ExplainSQRewriteTask extends Task<ExplainSQRewriteWork> implements
       return (0);
     }
     catch (Exception e) {
-      console.printError("Failed with exception " + e.getMessage(),
-          "\n" + StringUtils.stringifyException(e));
+      setException(e);
+      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
       return (1);
     }
     finally {

http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
index 0b30721..34da025 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
@@ -65,7 +65,6 @@ import org.apache.hadoop.hive.ql.plan.api.StageType;
 import org.apache.hadoop.hive.ql.security.authorization.AuthorizationFactory;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.util.StringUtils;
 import org.apache.hive.common.util.AnnotationUtils;
 import org.json.JSONArray;
 import org.json.JSONException;
@@ -383,8 +382,8 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
       return (0);
     }
     catch (Exception e) {
-      console.printError("Failed with exception " + e.getMessage(),
-          "\n" + StringUtils.stringifyException(e));
+      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+      setException(e);
       return (1);
     }
     finally {

http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/exec/MaterializedViewTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MaterializedViewTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MaterializedViewTask.java
index 834df84..19aef6c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MaterializedViewTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MaterializedViewTask.java
@@ -76,6 +76,7 @@ public class MaterializedViewTask extends Task<MaterializedViewDesc> implements
       }
     } catch (HiveException e) {
       LOG.debug("Exception during materialized view cache update", e);
+      setException(e);
     }
     return 0;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java
index 1cad579..de270cf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ReplCopyTask.java
@@ -163,8 +163,8 @@ public class ReplCopyTask extends Task<ReplCopyWork> implements Serializable {
       }
       return 0;
     } catch (Exception e) {
-      console.printError("Failed with exception " + e.getMessage(), "\n"
-          + StringUtils.stringifyException(e));
+      LOG.error(StringUtils.stringifyException(e));
+      setException(e);
       return (1);
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java
index 00eb7de..7a4242a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java
@@ -112,6 +112,7 @@ public class StatsTask extends Task<StatsWork> implements Serializable {
       }
     } catch (Exception e) {
       LOG.error("Failed to run stats task", e);
+      setException(e);
       return 1;
     }
     return 0;

http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
index 7ff8ddc..1de782a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
@@ -465,9 +465,9 @@ public class ExecDriver extends Task<MapredWork> implements Serializable, Hadoop
           jc.close();
         }
       } catch (Exception e) {
-	LOG.warn("Failed while cleaning up ", e);
+        LOG.warn("Failed while cleaning up ", e);
       } finally {
-	HadoopJobExecHelper.runningJobs.remove(rj);
+        HadoopJobExecHelper.runningJobs.remove(rj);
       }
     }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java
index 434c3a8..8f21f7c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/rcfile/truncate/ColumnTruncateTask.java
@@ -94,9 +94,8 @@ public class ColumnTruncateTask extends Task<ColumnTruncateWork> implements Seri
         ctxCreated = true;
       }
     }catch (IOException e) {
-      e.printStackTrace();
-      console.printError("Error launching map-reduce job", "\n"
-          + org.apache.hadoop.util.StringUtils.stringifyException(e));
+      LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
+      setException(e);
       return 5;
     }
 
@@ -136,7 +135,8 @@ public class ColumnTruncateTask extends Task<ColumnTruncateWork> implements Seri
         fs.mkdirs(tempOutPath);
       }
     } catch (IOException e) {
-      console.printError("Can't make path " + outputPath + " : " + e.getMessage());
+      setException(e);
+      LOG.error("Can't make path " + outputPath, e);
       return 6;
     }
 
@@ -191,19 +191,11 @@ public class ColumnTruncateTask extends Task<ColumnTruncateWork> implements Seri
       success = (returnVal == 0);
 
     } catch (Exception e) {
-      e.printStackTrace();
-      setException(e);
-      String mesg = " with exception '" + Utilities.getNameMessage(e) + "'";
-      if (rj != null) {
-        mesg = "Ended Job = " + rj.getJobID() + mesg;
-      } else {
-        mesg = "Job Submission failed" + mesg;
-      }
-
+      String mesg = rj != null ? ("Ended Job = " + rj.getJobID()) : "Job Submission failed";
       // Has to use full name to make sure it does not conflict with
       // org.apache.commons.lang.StringUtils
-      console.printError(mesg, "\n"
-          + org.apache.hadoop.util.StringUtils.stringifyException(e));
+      LOG.error(mesg, org.apache.hadoop.util.StringUtils.stringifyException(e));
+      setException(e);
 
       success = false;
       returnVal = 1;
@@ -220,9 +212,9 @@ public class ColumnTruncateTask extends Task<ColumnTruncateWork> implements Seri
         ColumnTruncateMapper.jobClose(outputPath, success, job, console,
           work.getDynPartCtx(), null);
       } catch (Exception e) {
-	LOG.warn("Failed while cleaning up ", e);
+        LOG.warn("Failed while cleaning up ", e);
       } finally {
-	HadoopJobExecHelper.runningJobs.remove(rj);
+        HadoopJobExecHelper.runningJobs.remove(rj);
       }
     }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/11b0d857/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecutionOverlayPlugin.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecutionOverlayPlugin.java b/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecutionOverlayPlugin.java
index 950903c..50803cc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecutionOverlayPlugin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/reexec/ReExecutionOverlayPlugin.java
@@ -42,7 +42,7 @@ public class ReExecutionOverlayPlugin implements IReExecutionPlugin {
       if (hookContext.getHookType() == HookType.ON_FAILURE_HOOK) {
         Throwable exception = hookContext.getException();
         if (exception != null) {
-          if (exception.getMessage().contains("Vertex failed,")) {
+          if (exception.getMessage() != null && exception.getMessage().contains("Vertex failed,")) {
             retryPossible = true;
           }
         }


[2/2] hive git commit: HIVE-18986: Table rename will run java.lang.StackOverflowError in dataNucleus if the table contains large number of columns (Aihua Xu, reviewed by Yongzhi Chen)

Posted by ai...@apache.org.
HIVE-18986: Table rename will run java.lang.StackOverflowError in dataNucleus if the table contains large number of columns (Aihua Xu, reviewed by Yongzhi Chen)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f30efbeb
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f30efbeb
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f30efbeb

Branch: refs/heads/master
Commit: f30efbebf2ff85c55a5d9e3e2f86e0a51341df78
Parents: 11b0d85
Author: Aihua Xu <ai...@apache.org>
Authored: Wed Apr 18 17:05:08 2018 -0700
Committer: Aihua Xu <ai...@apache.org>
Committed: Wed Apr 25 16:10:30 2018 -0700

----------------------------------------------------------------------
 .../queries/clientpositive/alter_rename_table.q | 12 ++-
 .../clientpositive/alter_rename_table.q.out     | 88 ++++++++++++++++++++
 .../apache/hadoop/hive/metastore/Batchable.java | 86 +++++++++++++++++++
 .../hive/metastore/MetaStoreDirectSql.java      | 61 ++------------
 .../hadoop/hive/metastore/ObjectStore.java      | 45 ++++++----
 .../hive/metastore/conf/MetastoreConf.java      |  5 ++
 6 files changed, 227 insertions(+), 70 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f30efbeb/ql/src/test/queries/clientpositive/alter_rename_table.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/alter_rename_table.q b/ql/src/test/queries/clientpositive/alter_rename_table.q
index 53fb230..bcf6ad5 100644
--- a/ql/src/test/queries/clientpositive/alter_rename_table.q
+++ b/ql/src/test/queries/clientpositive/alter_rename_table.q
@@ -36,4 +36,14 @@ create table source.src1 like default.src;
 load data local inpath '../../data/files/kv1.txt' overwrite into table source.src;
 
 ALTER TABLE source.src RENAME TO target.src1;
-select * from target.src1 tablesample (10 rows);
\ No newline at end of file
+select * from target.src1 tablesample (10 rows);
+
+set metastore.rawstore.batch.size=1;
+set metastore.try.direct.sql=false;
+
+create table source.src2 like default.src;
+load data local inpath '../../data/files/kv1.txt' overwrite into table source.src2;
+ANALYZE TABlE source.src2 COMPUTE STATISTICS FOR COLUMNS;
+ALTER TABLE source.src2 RENAME TO target.src3;
+DESC FORMATTED target.src3;
+select * from target.src3 tablesample (10 rows);

http://git-wip-us.apache.org/repos/asf/hive/blob/f30efbeb/ql/src/test/results/clientpositive/alter_rename_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alter_rename_table.q.out b/ql/src/test/results/clientpositive/alter_rename_table.q.out
index 732d8a2..9ac8fd2 100644
--- a/ql/src/test/results/clientpositive/alter_rename_table.q.out
+++ b/ql/src/test/results/clientpositive/alter_rename_table.q.out
@@ -261,3 +261,91 @@ POSTHOOK: Input: target@src1
 278	val_278
 98	val_98
 484	val_484
+PREHOOK: query: create table source.src2 like default.src
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:source
+PREHOOK: Output: source@src2
+POSTHOOK: query: create table source.src2 like default.src
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:source
+POSTHOOK: Output: source@src2
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table source.src2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: source@src2
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table source.src2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: source@src2
+PREHOOK: query: ANALYZE TABlE source.src2 COMPUTE STATISTICS FOR COLUMNS
+PREHOOK: type: QUERY
+PREHOOK: Input: source@src2
+#### A masked pattern was here ####
+PREHOOK: Output: source@src2
+POSTHOOK: query: ANALYZE TABlE source.src2 COMPUTE STATISTICS FOR COLUMNS
+POSTHOOK: type: QUERY
+POSTHOOK: Input: source@src2
+#### A masked pattern was here ####
+POSTHOOK: Output: source@src2
+PREHOOK: query: ALTER TABLE source.src2 RENAME TO target.src3
+PREHOOK: type: ALTERTABLE_RENAME
+PREHOOK: Input: source@src2
+PREHOOK: Output: source@src2
+POSTHOOK: query: ALTER TABLE source.src2 RENAME TO target.src3
+POSTHOOK: type: ALTERTABLE_RENAME
+POSTHOOK: Input: source@src2
+POSTHOOK: Output: source@src2
+POSTHOOK: Output: target@src3
+PREHOOK: query: DESC FORMATTED target.src3
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: target@src3
+POSTHOOK: query: DESC FORMATTED target.src3
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: target@src3
+# col_name            	data_type           	comment             
+key                 	string              	default             
+value               	string              	default             
+	 	 
+# Detailed Table Information	 	 
+Database:           	target              	 
+#### A masked pattern was here ####
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
+#### A masked pattern was here ####
+	numFiles            	1                   
+	numRows             	500                 
+	rawDataSize         	5312                
+	totalSize           	5812                
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: select * from target.src3 tablesample (10 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: target@src3
+#### A masked pattern was here ####
+POSTHOOK: query: select * from target.src3 tablesample (10 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: target@src3
+#### A masked pattern was here ####
+238	val_238
+86	val_86
+311	val_311
+27	val_27
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+98	val_98
+484	val_484

http://git-wip-us.apache.org/repos/asf/hive/blob/f30efbeb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java
new file mode 100644
index 0000000..7e488a5
--- /dev/null
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Batchable.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.metastore;
+
+import java.util.ArrayList;
+import java.util.List;
+import javax.jdo.Query;
+
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ *  Base class to add the batch process for DirectSQL or RawStore queries.
+ *  1. Provide the implementation of run() to process one batch
+ *  2. Call Batchable.runBatched() to process the whole dataset
+ *
+ *  I: input type, R: result type
+ */
+public abstract class Batchable<I, R> {
+  private static final Logger LOG = LoggerFactory.getLogger(Batchable.class);
+  public static final int NO_BATCHING = -1;
+
+  private List<Query> queries = null;
+  public abstract List<R> run(List<I> input) throws MetaException;
+
+  public void addQueryAfterUse(Query query) {
+    if (queries == null) {
+      queries = new ArrayList<Query>(1);
+    }
+    queries.add(query);
+  }
+  protected void addQueryAfterUse(Batchable<?, ?> b) {
+    if (b.queries == null) {
+      return;
+    }
+    if (queries == null) {
+      queries = new ArrayList<Query>(1);
+    }
+    queries.addAll(b.queries);
+  }
+  public void closeAllQueries() {
+    for (Query q : queries) {
+      try {
+        q.closeAll();
+      } catch (Throwable t) {
+        LOG.error("Failed to close a query", t);
+      }
+    }
+  }
+
+  public static <I, R> List<R> runBatched(
+      final int batchSize,
+      List<I> input,
+      Batchable<I, R> runnable) throws MetaException {
+    if (batchSize == NO_BATCHING || batchSize >= input.size()) {
+      return runnable.run(input);
+    }
+    List<R> result = new ArrayList<R>(input.size());
+    for (int fromIndex = 0, toIndex = 0; toIndex < input.size(); fromIndex = toIndex) {
+      toIndex = Math.min(fromIndex + batchSize, input.size());
+      List<I> batchedInput = input.subList(fromIndex, toIndex);
+      List<R> batchedOutput = runnable.run(batchedInput);
+      if (batchedOutput != null) {
+        result.addAll(batchedOutput);
+      }
+    }
+    return result;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/f30efbeb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index 997f5fd..4e0e887 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -455,7 +455,7 @@ class MetaStoreDirectSql {
     if (partNames.isEmpty()) {
       return Collections.emptyList();
     }
-    return runBatched(partNames, new Batchable<String, Partition>() {
+    return Batchable.runBatched(batchSize, partNames, new Batchable<String, Partition>() {
       @Override
       public List<Partition> run(List<String> input) throws MetaException {
         String filter = "" + PARTITIONS + ".\"PART_NAME\" in (" + makeParams(input.size()) + ")";
@@ -596,7 +596,7 @@ class MetaStoreDirectSql {
     }
 
     // Get full objects. For Oracle/etc. do it in batches.
-    List<Partition> result = runBatched(sqlResult, new Batchable<Object, Partition>() {
+    List<Partition> result = Batchable.runBatched(batchSize, sqlResult, new Batchable<Object, Partition>() {
       @Override
       public List<Partition> run(List<Object> input) throws MetaException {
         return getPartitionsFromPartitionIds(catNameLcase, dbNameLcase, tblNameLcase, isView,
@@ -1374,7 +1374,7 @@ class MetaStoreDirectSql {
         return ensureList(qResult);
       }
     };
-    List<Object[]> list = runBatched(colNames, b);
+    List<Object[]> list = Batchable.runBatched(batchSize, colNames, b);
     if (list.isEmpty()) {
       return null;
     }
@@ -1460,10 +1460,10 @@ class MetaStoreDirectSql {
         + " where \"CAT_NAME\" = ? and \"DB_NAME\" = ? and \"TABLE_NAME\" = ? "
         + " and \"COLUMN_NAME\" in (%1$s) and \"PARTITION_NAME\" in (%2$s)"
         + " group by \"PARTITION_NAME\"";
-    List<Long> allCounts = runBatched(colNames, new Batchable<String, Long>() {
+    List<Long> allCounts = Batchable.runBatched(batchSize, colNames, new Batchable<String, Long>() {
       @Override
       public List<Long> run(final List<String> inputColName) throws MetaException {
-        return runBatched(partNames, new Batchable<String, Long>() {
+        return Batchable.runBatched(batchSize, partNames, new Batchable<String, Long>() {
           @Override
           public List<Long> run(List<String> inputPartNames) throws MetaException {
             long partsFound = 0;
@@ -1503,10 +1503,10 @@ class MetaStoreDirectSql {
     final String tableName, final List<String> partNames, List<String> colNames, long partsFound,
     final boolean useDensityFunctionForNDVEstimation, final double ndvTuner, final boolean enableBitVector) throws MetaException {
     final boolean areAllPartsFound = (partsFound == partNames.size());
-    return runBatched(colNames, new Batchable<String, ColumnStatisticsObj>() {
+    return Batchable.runBatched(batchSize, colNames, new Batchable<String, ColumnStatisticsObj>() {
       @Override
       public List<ColumnStatisticsObj> run(final List<String> inputColNames) throws MetaException {
-        return runBatched(partNames, new Batchable<String, ColumnStatisticsObj>() {
+        return Batchable.runBatched(batchSize, partNames, new Batchable<String, ColumnStatisticsObj>() {
           @Override
           public List<ColumnStatisticsObj> run(List<String> inputPartNames) throws MetaException {
             return columnStatisticsObjForPartitionsBatch(catName, dbName, tableName, inputPartNames,
@@ -1918,13 +1918,13 @@ class MetaStoreDirectSql {
           }
         };
         try {
-          return runBatched(partNames, b2);
+          return Batchable.runBatched(batchSize, partNames, b2);
         } finally {
           addQueryAfterUse(b2);
         }
       }
     };
-    List<Object[]> list = runBatched(colNames, b);
+    List<Object[]> list = Batchable.runBatched(batchSize, colNames, b);
 
     List<ColumnStatistics> result = new ArrayList<ColumnStatistics>(
         Math.min(list.size(), partNames.size()));
@@ -2027,49 +2027,6 @@ class MetaStoreDirectSql {
   }
 
 
-  private static abstract class Batchable<I, R> {
-    private List<Query> queries = null;
-    public abstract List<R> run(List<I> input) throws MetaException;
-    public void addQueryAfterUse(Query query) {
-      if (queries == null) {
-        queries = new ArrayList<Query>(1);
-      }
-      queries.add(query);
-    }
-    protected void addQueryAfterUse(Batchable<?, ?> b) {
-      if (b.queries == null) return;
-      if (queries == null) {
-        queries = new ArrayList<Query>(1);
-      }
-      queries.addAll(b.queries);
-    }
-    public void closeAllQueries() {
-      for (Query q : queries) {
-        try {
-          q.closeAll();
-        } catch (Throwable t) {
-          LOG.error("Failed to close a query", t);
-        }
-      }
-    }
-  }
-
-  private <I,R> List<R> runBatched(List<I> input, Batchable<I, R> runnable) throws MetaException {
-    if (batchSize == NO_BATCHING || batchSize >= input.size()) {
-      return runnable.run(input);
-    }
-    List<R> result = new ArrayList<R>(input.size());
-    for (int fromIndex = 0, toIndex = 0; toIndex < input.size(); fromIndex = toIndex) {
-      toIndex = Math.min(fromIndex + batchSize, input.size());
-      List<I> batchedInput = input.subList(fromIndex, toIndex);
-      List<R> batchedOutput = runnable.run(batchedInput);
-      if (batchedOutput != null) {
-        result.addAll(batchedOutput);
-      }
-    }
-    return result;
-  }
-
   public List<SQLForeignKey> getForeignKeys(String catName, String parent_db_name,
                                             String parent_tbl_name, String foreign_db_name,
                                             String foreign_tbl_name) throws MetaException {

http://git-wip-us.apache.org/repos/asf/hive/blob/f30efbeb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index 184ecb6..1abd99d 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -244,6 +244,7 @@ public class ObjectStore implements RawStore, Configurable {
   private static Properties prop = null;
   private static PersistenceManagerFactory pmf = null;
   private static boolean forTwoMetastoreTesting = false;
+  private int batchSize = Batchable.NO_BATCHING;
 
   private static final DateTimeFormatter YMDHMS_FORMAT = DateTimeFormatter.ofPattern(
       "yyyy_MM_dd_HH_mm_ss");
@@ -385,6 +386,8 @@ public class ObjectStore implements RawStore, Configurable {
         directSqlErrors = Metrics.getOrCreateCounter(MetricsConstants.DIRECTSQL_ERRORS);
       }
 
+      this.batchSize = MetastoreConf.getIntVar(conf, ConfVars.RAWSTORE_PARTITION_BATCH_SIZE);
+
       if (!isInitialized) {
         throw new RuntimeException(
         "Unable to create persistence manager. Check dss.log for details");
@@ -8028,25 +8031,33 @@ public class ObjectStore implements RawStore, Configurable {
     try {
       openTransaction();
 
-      List<MTableColumnStatistics> result = null;
       validateTableCols(table, colNames);
       Query query = queryWrapper.query = pm.newQuery(MTableColumnStatistics.class);
-      String filter = "tableName == t1 && dbName == t2 && catName == t3 && (";
-      String paramStr = "java.lang.String t1, java.lang.String t2, java.lang.String t3";
-      Object[] params = new Object[colNames.size() + 3];
-      params[0] = table.getTableName();
-      params[1] = table.getDbName();
-      params[2] = table.getCatName();
-      for (int i = 0; i < colNames.size(); ++i) {
-        filter += ((i == 0) ? "" : " || ") + "colName == c" + i;
-        paramStr += ", java.lang.String c" + i;
-        params[i + 3] = colNames.get(i);
-      }
-      filter += ")";
-      query.setFilter(filter);
-      query.declareParameters(paramStr);
-      result = (List<MTableColumnStatistics>) query.executeWithArray(params);
-      pm.retrieveAll(result);
+      List<MTableColumnStatistics> result =
+          Batchable.runBatched(batchSize, colNames, new Batchable<String, MTableColumnStatistics>() {
+            @Override
+            public List<MTableColumnStatistics> run(List<String> input)
+                throws MetaException {
+              String filter = "tableName == t1 && dbName == t2 && catName == t3 && (";
+              String paramStr = "java.lang.String t1, java.lang.String t2, java.lang.String t3";
+              Object[] params = new Object[input.size() + 3];
+              params[0] = table.getTableName();
+              params[1] = table.getDbName();
+              params[2] = table.getCatName();
+              for (int i = 0; i < input.size(); ++i) {
+                filter += ((i == 0) ? "" : " || ") + "colName == c" + i;
+                paramStr += ", java.lang.String c" + i;
+                params[i + 3] = input.get(i);
+              }
+              filter += ")";
+              query.setFilter(filter);
+              query.declareParameters(paramStr);
+              List<MTableColumnStatistics> paritial = (List<MTableColumnStatistics>) query.executeWithArray(params);
+              pm.retrieveAll(paritial);
+              return paritial;
+            }
+          });
+
       if (result.size() > colNames.size()) {
         throw new MetaException("Unexpected " + result.size() + " statistics for "
             + colNames.size() + " columns");

http://git-wip-us.apache.org/repos/asf/hive/blob/f30efbeb/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
index 552eeca..35aa40c 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
@@ -856,6 +856,11 @@ public class MetastoreConf {
         "hive.metastore.wm.default.pool.size", 4,
         "The size of a default pool to create when creating an empty resource plan;\n" +
         "If not positive, no default pool will be created."),
+    RAWSTORE_PARTITION_BATCH_SIZE("metastore.rawstore.batch.size",
+        "metastore.rawstore.batch.size", -1,
+        "Batch size for partition and other object retrieval from the underlying DB in JDO.\n" +
+        "The JDO implementation such as DataNucleus may run into issues when the generated queries are\n" +
+        "too large. Use this parameter to break the query into multiple batches. -1 means no batching."),
 
     // Hive values we have copied and use as is
     // These two are used to indicate that we are running tests