You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2017/12/04 08:47:34 UTC

[3/3] hive git commit: HIVE-17620: Use the default MR scratch directory (HDFS) in the only case when hive.blobstore.optimizations.enabled=true AND isFinalJob=true (Gergely Hajós reviewed by Rajesh Balamohan via Zoltan Haindrich)

HIVE-17620: Use the default MR scratch directory (HDFS) in the only case when hive.blobstore.optimizations.enabled=true AND isFinalJob=true (Gergely Hajós reviewed by Rajesh Balamohan via Zoltan Haindrich)

Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2a89e83c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2a89e83c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2a89e83c

Branch: refs/heads/master
Commit: 2a89e83c2f9c7160617618910c99178bb5ef7bc5
Parents: 266d4bb
Author: Gergely Hajós <ro...@gmail.com>
Authored: Mon Dec 4 09:39:56 2017 +0100
Committer: Zoltan Haindrich <ki...@rxd.hu>
Committed: Mon Dec 4 09:41:42 2017 +0100

----------------------------------------------------------------------
 .../write_final_output_blobstore.q.out          |  8 +++----
 .../java/org/apache/hadoop/hive/ql/Context.java | 23 ++++++++++----------
 .../hive/ql/optimizer/GenMapRedUtils.java       |  2 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  | 10 ++++-----
 .../apache/hadoop/hive/ql/exec/TestContext.java | 13 +++++++----
 5 files changed, 30 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/2a89e83c/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out
----------------------------------------------------------------------
diff --git a/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out b/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out
index 45e6d25..3d53292 100644
--- a/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out
+++ b/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out
@@ -184,10 +184,10 @@ STAGE PLANS:
           File Output Operator
             compressed: false
             GlobalTableId: 1
-#### A masked pattern was here ####
+            directory: ### BLOBSTORE_STAGING_PATH ###
             NumFilesPerFileSink: 1
             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-#### A masked pattern was here ####
+            Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ###
             table:
                 input format: org.apache.hadoop.mapred.TextInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -219,7 +219,7 @@ STAGE PLANS:
     Move Operator
       tables:
           replace: true
-#### A masked pattern was here ####
+          source: ### BLOBSTORE_STAGING_PATH ###
           table:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -247,7 +247,7 @@ STAGE PLANS:
   Stage: Stage-3
     Stats Work
       Basic Stats Work:
-#### A masked pattern was here ####
+          Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ###
 
 PREHOOK: query: EXPLAIN EXTENDED FROM hdfs_table INSERT OVERWRITE TABLE blobstore_table SELECT hdfs_table.key GROUP BY hdfs_table.key ORDER BY hdfs_table.key
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/2a89e83c/ql/src/java/org/apache/hadoop/hive/ql/Context.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
index 97b52b0..57e1803 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
@@ -509,20 +509,19 @@ public class Context {
    *
    * @return A path to the new temporary directory
    */
-  public Path getTempDirForPath(Path path, boolean isFinalJob) {
-    if (((BlobStorageUtils.isBlobStoragePath(conf, path) && !BlobStorageUtils.isBlobStorageAsScratchDir(
-            conf)) || isPathLocal(path))) {
-      if (!(isFinalJob && BlobStorageUtils.areOptimizationsEnabled(conf))) {
-        // For better write performance, we use HDFS for temporary data when object store is used.
-        // Note that the scratch directory configuration variable must use HDFS or any other non-blobstorage system
-        // to take advantage of this performance.
-        return getMRTmpPath();
-      }
+  public Path getTempDirForInterimJobPath(Path path) {
+    // For better write performance, we use HDFS for temporary data when object store is used.
+    // Note that the scratch directory configuration variable must use HDFS or any other
+    // non-blobstorage system to take advantage of this performance.
+    boolean isBlobstorageOptimized = BlobStorageUtils.isBlobStoragePath(conf, path)
+        && !BlobStorageUtils.isBlobStorageAsScratchDir(conf) && BlobStorageUtils.areOptimizationsEnabled(conf);
+
+    if (isPathLocal(path) || isBlobstorageOptimized) {
+      return getMRTmpPath();
     }
     return getExtTmpPathRelTo(path);
   }
 
-
   /**
    * Create a temporary directory depending of the path specified.
    * - If path is an Object store filesystem, then use the default MR scratch directory (HDFS)
@@ -531,8 +530,8 @@ public class Context {
    * @param path Path used to verify the Filesystem to use for temporary directory
    * @return A path to the new temporary directory
    */
-  public Path getTempDirForPath(Path path) {
-    return getTempDirForPath(path, false);
+  public Path getTempDirForFinalJobPath(Path path) {
+    return getExtTmpPathRelTo(path);
   }
 
   /*

http://git-wip-us.apache.org/repos/asf/hive/blob/2a89e83c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index be1d4b8..bdaf105 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -1993,7 +1993,7 @@ public final class GenMapRedUtils {
 
         // Create the required temporary file in the HDFS location if the destination
         // path of the FileSinkOperator table is a blobstore path.
-        Path tmpDir = baseCtx.getTempDirForPath(fileSinkDesc.getDestPath(), true);
+        Path tmpDir = baseCtx.getTempDirForFinalJobPath(fileSinkDesc.getDestPath());
 
         // Change all the linked file sink descriptors
         if (fileSinkDesc.isLinkedFileSink()) {

http://git-wip-us.apache.org/repos/asf/hive/blob/2a89e83c/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 5dd3583..f6bbac6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -6827,7 +6827,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       if (isNonNativeTable || isMmTable) {
         queryTmpdir = dest_path;
       } else {
-        queryTmpdir = ctx.getTempDirForPath(dest_path, true);
+        queryTmpdir = ctx.getTempDirForFinalJobPath(dest_path);
       }
       if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
         Utilities.FILE_OP_LOGGER.trace("create filesink w/DEST_TABLE specifying " + queryTmpdir
@@ -6914,7 +6914,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
           .getAuthority(), partPath.toUri().getPath());
 
       isMmTable = AcidUtils.isInsertOnlyTable(dest_tab.getParameters());
-      queryTmpdir = isMmTable ? dest_path : ctx.getTempDirForPath(dest_path, true);
+      queryTmpdir = isMmTable ? dest_path : ctx.getTempDirForFinalJobPath(dest_path);
       if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
         Utilities.FILE_OP_LOGGER.trace("create filesink w/DEST_PARTITION specifying "
             + queryTmpdir + " from " + dest_path);
@@ -7001,7 +7001,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
         // no copy is required. we may want to revisit this policy in future
         try {
           Path qPath = FileUtils.makeQualified(dest_path, conf);
-          queryTmpdir = isMmTable ? qPath : ctx.getTempDirForPath(qPath, true);
+          queryTmpdir = isMmTable ? qPath : ctx.getTempDirForFinalJobPath(qPath);
           if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
             Utilities.FILE_OP_LOGGER.trace("Setting query directory " + queryTmpdir
                   + " from " + dest_path + " (" + isMmTable + ")");
@@ -7299,7 +7299,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName().toString());
     if (!destTableIsMaterialization &&
             HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
-      String statsTmpLoc = ctx.getTempDirForPath(dest_path).toString();
+      String statsTmpLoc = ctx.getTempDirForInterimJobPath(dest_path).toString();
       fileSinkDesc.setStatsTmpDir(statsTmpLoc);
       LOG.debug("Set stats collection dir : " + statsTmpLoc);
     }
@@ -10667,7 +10667,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       tsDesc.setGatherStats(false);
     } else {
       if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
-        String statsTmpLoc = ctx.getTempDirForPath(tab.getPath()).toString();
+        String statsTmpLoc = ctx.getTempDirForInterimJobPath(tab.getPath()).toString();
         LOG.debug("Set stats collection dir : " + statsTmpLoc);
         tsDesc.setTmpStatsDir(statsTmpLoc);
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/2a89e83c/ql/src/test/org/apache/hadoop/hive/ql/exec/TestContext.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestContext.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestContext.java
index 808cb94..b442d69 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestContext.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestContext.java
@@ -52,17 +52,22 @@ public class TestContext {
         // directory on the default scratch diretory location (usually /temp)
         Path mrTmpPath = new Path("hdfs://hostname/tmp/scratch");
         doReturn(mrTmpPath).when(spyContext).getMRTmpPath();
-        assertEquals(mrTmpPath, spyContext.getTempDirForPath(new Path("s3a://bucket/dir")));
+        assertEquals(mrTmpPath, spyContext.getTempDirForInterimJobPath(new Path("s3a://bucket/dir")));
 
         // When local filesystem paths are used, then getMRTmpPatch() should be called to
         // get a temporary directory
-        assertEquals(mrTmpPath, spyContext.getTempDirForPath(new Path("file:/user")));
-        assertEquals(mrTmpPath, spyContext.getTempDirForPath(new Path("file:///user")));
+        assertEquals(mrTmpPath, spyContext.getTempDirForInterimJobPath(new Path("file:/user")));
+        assertEquals(mrTmpPath, spyContext.getTempDirForInterimJobPath(new Path("file:///user")));
 
         // When Non-Object store paths are used, then getExtTmpPathRelTo is called to get a temporary
         // directory on the same path passed as a parameter
         Path tmpPathRelTo = new Path("hdfs://hostname/user");
         doReturn(tmpPathRelTo).when(spyContext).getExtTmpPathRelTo(any(Path.class));
-        assertEquals(tmpPathRelTo, spyContext.getTempDirForPath(new Path("/user")));
+        assertEquals(tmpPathRelTo, spyContext.getTempDirForInterimJobPath(new Path("/user")));
+
+        conf.setBoolean(HiveConf.ConfVars.HIVE_BLOBSTORE_OPTIMIZATIONS_ENABLED.varname, false);
+        assertEquals(tmpPathRelTo, spyContext.getTempDirForInterimJobPath(new Path("s3a://bucket/dir")));
+        assertEquals(mrTmpPath, spyContext.getTempDirForInterimJobPath(new Path("file:///user")));
+        conf.setBoolean(HiveConf.ConfVars.HIVE_BLOBSTORE_OPTIMIZATIONS_ENABLED.varname, true);
     }
 }