You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by su...@apache.org on 2017/04/14 16:01:25 UTC
hive git commit: Support Parquet StatsNoJobTask for Spark & Tez
engine (Chao Sun, reviewed by Xuefu Zhang)
Repository: hive
Updated Branches:
refs/heads/master a496e5811 -> f1aae85f1
Support Parquet StatsNoJobTask for Spark & Tez engine (Chao Sun, reviewed by Xuefu Zhang)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f1aae85f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f1aae85f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f1aae85f
Branch: refs/heads/master
Commit: f1aae85f197de09d4b86143f7f13d5aa21d2eb85
Parents: a496e58
Author: Chao Sun <su...@apache.org>
Authored: Thu Apr 13 15:08:58 2017 -0700
Committer: Chao Sun <su...@apache.org>
Committed: Fri Apr 14 09:00:49 2017 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java | 6 ++++--
.../hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java | 5 +++--
2 files changed, 7 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f1aae85f/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java
index 46c24e3..b6d7ee8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java
@@ -24,6 +24,7 @@ import java.util.List;
import java.util.Set;
import java.util.Stack;
+import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.Path;
@@ -94,8 +95,9 @@ public class ProcessAnalyzeTable implements NodeProcessor {
assert alias != null;
TezWork tezWork = context.currentTask.getWork();
- if (inputFormat.equals(OrcInputFormat.class)) {
- // For ORC, all the following statements are the same
+ if (OrcInputFormat.class.isAssignableFrom(inputFormat) ||
+ MapredParquetInputFormat.class.isAssignableFrom(inputFormat)) {
+ // For ORC & Parquet, all the following statements are the same
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
http://git-wip-us.apache.org/repos/asf/hive/blob/f1aae85f/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java
index 52186b4..52af3af 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java
@@ -22,6 +22,7 @@ import java.util.List;
import java.util.Set;
import java.util.Stack;
+import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.Path;
@@ -100,8 +101,8 @@ public class SparkProcessAnalyzeTable implements NodeProcessor {
SparkWork sparkWork = context.currentTask.getWork();
boolean partialScan = parseContext.getQueryProperties().isPartialScanAnalyzeCommand();
boolean noScan = parseContext.getQueryProperties().isNoScanAnalyzeCommand();
- if (inputFormat.equals(OrcInputFormat.class) && (noScan || partialScan)) {
-
+ if ((OrcInputFormat.class.isAssignableFrom(inputFormat) ||
+ MapredParquetInputFormat.class.isAssignableFrom(inputFormat)) && (noScan || partialScan)) {
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
// ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
// There will not be any Spark job above this task