You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2017/12/04 08:47:34 UTC
[3/3] hive git commit: HIVE-17620: Use the default MR scratch directory (HDFS) in the only case when hive.blobstore.optimizations.enabled=true AND isFinalJob=true (Gergely Hajós reviewed by Rajesh Balamohan via Zoltan Haindrich)
HIVE-17620: Use the default MR scratch directory (HDFS) in the only case when hive.blobstore.optimizations.enabled=true AND isFinalJob=true (Gergely Hajós reviewed by Rajesh Balamohan via Zoltan Haindrich)
Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2a89e83c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2a89e83c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2a89e83c
Branch: refs/heads/master
Commit: 2a89e83c2f9c7160617618910c99178bb5ef7bc5
Parents: 266d4bb
Author: Gergely Hajós <ro...@gmail.com>
Authored: Mon Dec 4 09:39:56 2017 +0100
Committer: Zoltan Haindrich <ki...@rxd.hu>
Committed: Mon Dec 4 09:41:42 2017 +0100
----------------------------------------------------------------------
.../write_final_output_blobstore.q.out | 8 +++----
.../java/org/apache/hadoop/hive/ql/Context.java | 23 ++++++++++----------
.../hive/ql/optimizer/GenMapRedUtils.java | 2 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 10 ++++-----
.../apache/hadoop/hive/ql/exec/TestContext.java | 13 +++++++----
5 files changed, 30 insertions(+), 26 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2a89e83c/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out
----------------------------------------------------------------------
diff --git a/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out b/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out
index 45e6d25..3d53292 100644
--- a/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out
+++ b/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out
@@ -184,10 +184,10 @@ STAGE PLANS:
File Output Operator
compressed: false
GlobalTableId: 1
-#### A masked pattern was here ####
+ directory: ### BLOBSTORE_STAGING_PATH ###
NumFilesPerFileSink: 1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-#### A masked pattern was here ####
+ Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ###
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -219,7 +219,7 @@ STAGE PLANS:
Move Operator
tables:
replace: true
-#### A masked pattern was here ####
+ source: ### BLOBSTORE_STAGING_PATH ###
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -247,7 +247,7 @@ STAGE PLANS:
Stage: Stage-3
Stats Work
Basic Stats Work:
-#### A masked pattern was here ####
+ Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ###
PREHOOK: query: EXPLAIN EXTENDED FROM hdfs_table INSERT OVERWRITE TABLE blobstore_table SELECT hdfs_table.key GROUP BY hdfs_table.key ORDER BY hdfs_table.key
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/2a89e83c/ql/src/java/org/apache/hadoop/hive/ql/Context.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
index 97b52b0..57e1803 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
@@ -509,20 +509,19 @@ public class Context {
*
* @return A path to the new temporary directory
*/
- public Path getTempDirForPath(Path path, boolean isFinalJob) {
- if (((BlobStorageUtils.isBlobStoragePath(conf, path) && !BlobStorageUtils.isBlobStorageAsScratchDir(
- conf)) || isPathLocal(path))) {
- if (!(isFinalJob && BlobStorageUtils.areOptimizationsEnabled(conf))) {
- // For better write performance, we use HDFS for temporary data when object store is used.
- // Note that the scratch directory configuration variable must use HDFS or any other non-blobstorage system
- // to take advantage of this performance.
- return getMRTmpPath();
- }
+ public Path getTempDirForInterimJobPath(Path path) {
+ // For better write performance, we use HDFS for temporary data when object store is used.
+ // Note that the scratch directory configuration variable must use HDFS or any other
+ // non-blobstorage system to take advantage of this performance.
+ boolean isBlobstorageOptimized = BlobStorageUtils.isBlobStoragePath(conf, path)
+ && !BlobStorageUtils.isBlobStorageAsScratchDir(conf) && BlobStorageUtils.areOptimizationsEnabled(conf);
+
+ if (isPathLocal(path) || isBlobstorageOptimized) {
+ return getMRTmpPath();
}
return getExtTmpPathRelTo(path);
}
-
/**
* Create a temporary directory depending of the path specified.
* - If path is an Object store filesystem, then use the default MR scratch directory (HDFS)
@@ -531,8 +530,8 @@ public class Context {
* @param path Path used to verify the Filesystem to use for temporary directory
* @return A path to the new temporary directory
*/
- public Path getTempDirForPath(Path path) {
- return getTempDirForPath(path, false);
+ public Path getTempDirForFinalJobPath(Path path) {
+ return getExtTmpPathRelTo(path);
}
/*
http://git-wip-us.apache.org/repos/asf/hive/blob/2a89e83c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index be1d4b8..bdaf105 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -1993,7 +1993,7 @@ public final class GenMapRedUtils {
// Create the required temporary file in the HDFS location if the destination
// path of the FileSinkOperator table is a blobstore path.
- Path tmpDir = baseCtx.getTempDirForPath(fileSinkDesc.getDestPath(), true);
+ Path tmpDir = baseCtx.getTempDirForFinalJobPath(fileSinkDesc.getDestPath());
// Change all the linked file sink descriptors
if (fileSinkDesc.isLinkedFileSink()) {
http://git-wip-us.apache.org/repos/asf/hive/blob/2a89e83c/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 5dd3583..f6bbac6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -6827,7 +6827,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
if (isNonNativeTable || isMmTable) {
queryTmpdir = dest_path;
} else {
- queryTmpdir = ctx.getTempDirForPath(dest_path, true);
+ queryTmpdir = ctx.getTempDirForFinalJobPath(dest_path);
}
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("create filesink w/DEST_TABLE specifying " + queryTmpdir
@@ -6914,7 +6914,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
.getAuthority(), partPath.toUri().getPath());
isMmTable = AcidUtils.isInsertOnlyTable(dest_tab.getParameters());
- queryTmpdir = isMmTable ? dest_path : ctx.getTempDirForPath(dest_path, true);
+ queryTmpdir = isMmTable ? dest_path : ctx.getTempDirForFinalJobPath(dest_path);
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("create filesink w/DEST_PARTITION specifying "
+ queryTmpdir + " from " + dest_path);
@@ -7001,7 +7001,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
// no copy is required. we may want to revisit this policy in future
try {
Path qPath = FileUtils.makeQualified(dest_path, conf);
- queryTmpdir = isMmTable ? qPath : ctx.getTempDirForPath(qPath, true);
+ queryTmpdir = isMmTable ? qPath : ctx.getTempDirForFinalJobPath(qPath);
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Setting query directory " + queryTmpdir
+ " from " + dest_path + " (" + isMmTable + ")");
@@ -7299,7 +7299,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
fileSinkDesc.setStatsAggPrefix(fileSinkDesc.getDirName().toString());
if (!destTableIsMaterialization &&
HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
- String statsTmpLoc = ctx.getTempDirForPath(dest_path).toString();
+ String statsTmpLoc = ctx.getTempDirForInterimJobPath(dest_path).toString();
fileSinkDesc.setStatsTmpDir(statsTmpLoc);
LOG.debug("Set stats collection dir : " + statsTmpLoc);
}
@@ -10667,7 +10667,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
tsDesc.setGatherStats(false);
} else {
if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) {
- String statsTmpLoc = ctx.getTempDirForPath(tab.getPath()).toString();
+ String statsTmpLoc = ctx.getTempDirForInterimJobPath(tab.getPath()).toString();
LOG.debug("Set stats collection dir : " + statsTmpLoc);
tsDesc.setTmpStatsDir(statsTmpLoc);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2a89e83c/ql/src/test/org/apache/hadoop/hive/ql/exec/TestContext.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestContext.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestContext.java
index 808cb94..b442d69 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestContext.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestContext.java
@@ -52,17 +52,22 @@ public class TestContext {
// directory on the default scratch diretory location (usually /temp)
Path mrTmpPath = new Path("hdfs://hostname/tmp/scratch");
doReturn(mrTmpPath).when(spyContext).getMRTmpPath();
- assertEquals(mrTmpPath, spyContext.getTempDirForPath(new Path("s3a://bucket/dir")));
+ assertEquals(mrTmpPath, spyContext.getTempDirForInterimJobPath(new Path("s3a://bucket/dir")));
// When local filesystem paths are used, then getMRTmpPatch() should be called to
// get a temporary directory
- assertEquals(mrTmpPath, spyContext.getTempDirForPath(new Path("file:/user")));
- assertEquals(mrTmpPath, spyContext.getTempDirForPath(new Path("file:///user")));
+ assertEquals(mrTmpPath, spyContext.getTempDirForInterimJobPath(new Path("file:/user")));
+ assertEquals(mrTmpPath, spyContext.getTempDirForInterimJobPath(new Path("file:///user")));
// When Non-Object store paths are used, then getExtTmpPathRelTo is called to get a temporary
// directory on the same path passed as a parameter
Path tmpPathRelTo = new Path("hdfs://hostname/user");
doReturn(tmpPathRelTo).when(spyContext).getExtTmpPathRelTo(any(Path.class));
- assertEquals(tmpPathRelTo, spyContext.getTempDirForPath(new Path("/user")));
+ assertEquals(tmpPathRelTo, spyContext.getTempDirForInterimJobPath(new Path("/user")));
+
+ conf.setBoolean(HiveConf.ConfVars.HIVE_BLOBSTORE_OPTIMIZATIONS_ENABLED.varname, false);
+ assertEquals(tmpPathRelTo, spyContext.getTempDirForInterimJobPath(new Path("s3a://bucket/dir")));
+ assertEquals(mrTmpPath, spyContext.getTempDirForInterimJobPath(new Path("file:///user")));
+ conf.setBoolean(HiveConf.ConfVars.HIVE_BLOBSTORE_OPTIMIZATIONS_ENABLED.varname, true);
}
}