You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sz...@apache.org on 2015/11/12 21:20:45 UTC
hive git commit: HIVE-7476 : CTAS does not work properly for s3
(Szehon, reviewed by Sergio Pena and Lenni Kuff)
Repository: hive
Updated Branches:
refs/heads/branch-1 48d0ec9ee -> d68f21153
HIVE-7476 : CTAS does not work properly for s3 (Szehon, reviewed by Sergio Pena and Lenni Kuff)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d68f2115
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d68f2115
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d68f2115
Branch: refs/heads/branch-1
Commit: d68f21153e95fd308e82cce029b62d31c3ae05f5
Parents: 48d0ec9
Author: Szehon Ho <sz...@cloudera.com>
Authored: Fri Aug 7 12:01:30 2015 -0700
Committer: Szehon Ho <sz...@cloudera.com>
Committed: Thu Nov 12 12:16:15 2015 -0800
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/exec/MoveTask.java | 2 +-
.../apache/hadoop/hive/ql/metadata/Hive.java | 77 +++++++++++++-------
2 files changed, 53 insertions(+), 26 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/d68f2115/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
index 9e9d99e..6b8cfd8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
@@ -103,7 +103,7 @@ public class MoveTask extends Task<MoveWork> implements Serializable {
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_INSERT_INTO_MULTILEVEL_DIRS)) {
deletePath = createTargetPath(targetPath, fs);
}
- if (!Hive.moveFile(conf, sourcePath, targetPath, fs, true, false)) {
+ if (!Hive.moveFile(conf, sourcePath, targetPath, true, false)) {
try {
if (deletePath != null) {
fs.delete(deletePath, true);
http://git-wip-us.apache.org/repos/asf/hive/blob/d68f2115/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index ff8fe8a..b5bba9f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -2520,16 +2520,16 @@ private void constructOneLBLocationMap(FileStatus fSta,
return false;
}
- private static boolean isSubDir(Path srcf, Path destf, FileSystem fs, boolean isSrcLocal){
+ private static boolean isSubDir(Path srcf, Path destf, FileSystem srcFs, FileSystem destFs, boolean isSrcLocal) {
if (srcf == null) {
LOG.debug("The source path is null for isSubDir method.");
return false;
}
- String fullF1 = getQualifiedPathWithoutSchemeAndAuthority(srcf, fs);
- String fullF2 = getQualifiedPathWithoutSchemeAndAuthority(destf, fs);
+ String fullF1 = getQualifiedPathWithoutSchemeAndAuthority(srcf, srcFs);
+ String fullF2 = getQualifiedPathWithoutSchemeAndAuthority(destf, destFs);
- boolean isInTest = Boolean.valueOf(HiveConf.getBoolVar(fs.getConf(), ConfVars.HIVE_IN_TEST));
+ boolean isInTest = Boolean.valueOf(HiveConf.getBoolVar(srcFs.getConf(), ConfVars.HIVE_IN_TEST));
// In the automation, the data warehouse is the local file system based.
LOG.debug("The source path is " + fullF1 + " and the destination path is " + fullF2);
if (isInTest) {
@@ -2568,15 +2568,27 @@ private void constructOneLBLocationMap(FileStatus fSta,
//from mv command if the destf is a directory, it replaces the destf instead of moving under
//the destf. in this case, the replaced destf still preserves the original destf's permission
public static boolean moveFile(HiveConf conf, Path srcf, Path destf,
- FileSystem fs, boolean replace, boolean isSrcLocal) throws HiveException {
+ boolean replace, boolean isSrcLocal) throws HiveException {
boolean success = false;
+ FileSystem srcFs, destFs;
+ try {
+ destFs = destf.getFileSystem(conf);
+ } catch (IOException e) {
+ LOG.error(e);
+ throw new HiveException(e.getMessage(), e);
+ }
+ try {
+ srcFs = srcf.getFileSystem(conf);
+ } catch (IOException e) {
+ LOG.error(e);
+ throw new HiveException(e.getMessage(), e);
+ }
//needed for perm inheritance.
boolean inheritPerms = HiveConf.getBoolVar(conf,
HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS);
HadoopShims shims = ShimLoader.getHadoopShims();
HadoopShims.HdfsFileStatus destStatus = null;
- HadoopShims.HdfsEncryptionShim hdfsEncryptionShim = SessionState.get().getHdfsEncryptionShim();
// If source path is a subdirectory of the destination path:
// ex: INSERT OVERWRITE DIRECTORY 'target/warehouse/dest4.out' SELECT src.value WHERE src.key >= 300;
@@ -2584,11 +2596,11 @@ private void constructOneLBLocationMap(FileStatus fSta,
// (1) Do not delete the dest dir before doing the move operation.
// (2) It is assumed that subdir and dir are in same encryption zone.
// (3) Move individual files from scr dir to dest dir.
- boolean destIsSubDir = isSubDir(srcf, destf, fs, isSrcLocal);
+ boolean destIsSubDir = isSubDir(srcf, destf, srcFs, destFs, isSrcLocal);
try {
if (inheritPerms || replace) {
try{
- destStatus = shims.getFullFileStatus(conf, fs, destf.getParent());
+ destStatus = shims.getFullFileStatus(conf, destFs, destf.getParent());
//if destf is an existing directory:
//if replace is true, delete followed by rename(mv) is equivalent to replace
//if replace is false, rename (mv) actually move the src under dest dir
@@ -2596,20 +2608,22 @@ private void constructOneLBLocationMap(FileStatus fSta,
// to delete the file first
if (replace && !destIsSubDir) {
LOG.debug("The path " + destf.toString() + " is deleted");
- fs.delete(destf, true);
+ destFs.delete(destf, true);
}
} catch (FileNotFoundException ignore) {
//if dest dir does not exist, any re
if (inheritPerms) {
- destStatus = shims.getFullFileStatus(conf, fs, destf.getParent());
+ destStatus = shims.getFullFileStatus(conf, destFs, destf.getParent());
}
}
}
- if (!isSrcLocal) {
- // For NOT local src file, rename the file
- if (hdfsEncryptionShim != null && (hdfsEncryptionShim.isPathEncrypted(srcf) || hdfsEncryptionShim.isPathEncrypted(destf))
- && !hdfsEncryptionShim.arePathsOnSameEncryptionZone(srcf, destf))
- {
+ if (isSrcLocal) {
+ // For local src file, copy to hdfs
+ destFs.copyFromLocalFile(srcf, destf);
+ success = true;
+ } else {
+ if (needToCopy(srcf, destf, srcFs, destFs)) {
+ //copy if across file system or encryption zones.
LOG.info("Copying source " + srcf + " to " + destf + " because HDFS encryption zones are different.");
success = FileUtils.copy(srcf.getFileSystem(conf), srcf, destf.getFileSystem(conf), destf,
true, // delete source
@@ -2617,7 +2631,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
conf);
} else {
if (destIsSubDir) {
- FileStatus[] srcs = fs.listStatus(srcf, FileUtils.HIDDEN_FILES_PATH_FILTER);
+ FileStatus[] srcs = destFs.listStatus(srcf, FileUtils.HIDDEN_FILES_PATH_FILTER);
if (srcs.length == 0) {
success = true; // Nothing to move.
}
@@ -2632,13 +2646,9 @@ private void constructOneLBLocationMap(FileStatus fSta,
}
}
} else {
- success = fs.rename(srcf, destf);
+ success = destFs.rename(srcf, destf);
}
}
- } else {
- // For local src file, copy to hdfs
- fs.copyFromLocalFile(srcf, destf);
- success = true;
}
LOG.info((replace ? "Replacing src:" : "Renaming src: ") + srcf.toString()
@@ -2649,7 +2659,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
if (success && inheritPerms) {
try {
- ShimLoader.getHadoopShims().setFullFileStatus(conf, destStatus, fs, destf);
+ ShimLoader.getHadoopShims().setFullFileStatus(conf, destStatus, destFs, destf);
} catch (IOException e) {
LOG.warn("Error setting permission of file " + destf + ": "+ e.getMessage(), e);
}
@@ -2658,6 +2668,23 @@ private void constructOneLBLocationMap(FileStatus fSta,
}
/**
+ * If moving across different FileSystems or differnent encryption zone, need to do a File copy instead of rename.
+ * TODO- consider if need to do this for different file authority.
+ */
+ static protected boolean needToCopy(Path srcf, Path destf, FileSystem srcFs, FileSystem destFs) throws HiveException, IOException {
+ //Check if different FileSystems
+ if (!srcFs.getClass().equals(destFs.getClass())) {
+ return true;
+ }
+
+ //Check if different encryption zones
+ HadoopShims.HdfsFileStatus destStatus = null;
+ HadoopShims.HdfsEncryptionShim hdfsEncryptionShim = SessionState.get().getHdfsEncryptionShim();
+ return hdfsEncryptionShim != null && (hdfsEncryptionShim.isPathEncrypted(srcf) || hdfsEncryptionShim.isPathEncrypted(destf))
+ && !hdfsEncryptionShim.arePathsOnSameEncryptionZone(srcf, destf);
+ }
+
+ /**
* Copy files. This handles building the mapping for buckets and such between the source and
* destination
* @param conf Configuration object
@@ -2711,7 +2738,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
try {
for (List<Path[]> sdpairs : result) {
for (Path[] sdpair : sdpairs) {
- if (!moveFile(conf, sdpair[0], sdpair[1], fs, false, isSrcLocal)) {
+ if (!moveFile(conf, sdpair[0], sdpair[1], false, isSrcLocal)) {
throw new IOException("Cannot move " + sdpair[0] + " to "
+ sdpair[1]);
}
@@ -2892,7 +2919,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
inheritFromTable(tablePath, destParent, conf, destFs);
}
}
- if (!moveFile(conf, sdpair[0], sdpair[1], destFs, true, isSrcLocal)) {
+ if (!moveFile(conf, sdpair[0], sdpair[1], true, isSrcLocal)) {
throw new IOException("Unable to move file/directory from " + sdpair[0] +
" to " + sdpair[1]);
}
@@ -2911,7 +2938,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
// srcs must be a list of files -- ensured by LoadSemanticAnalyzer
for (List<Path[]> sdpairs : result) {
for (Path[] sdpair : sdpairs) {
- if (!moveFile(conf, sdpair[0], sdpair[1], destFs, true,
+ if (!moveFile(conf, sdpair[0], sdpair[1], true,
isSrcLocal)) {
throw new IOException("Error moving: " + sdpair[0] + " into: " + sdpair[1]);
}