You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sp...@apache.org on 2015/09/28 16:59:42 UTC
hive git commit: HIVE-11940: "INSERT OVERWRITE" query is very slow
because it creates one "distcp" per file to copy data from staging directory
to target directory (Sergio Pena, reviewd by Ferdinand Xu)
Repository: hive
Updated Branches:
refs/heads/master abe622be4 -> ba21806b7
HIVE-11940: "INSERT OVERWRITE" query is very slow because it creates one "distcp" per file to copy data from staging directory to target directory (Sergio Pena, reviewd by Ferdinand Xu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ba21806b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ba21806b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ba21806b
Branch: refs/heads/master
Commit: ba21806b77287e237e1aa68fa169d2a81e07346d
Parents: abe622b
Author: Sergio Pena <se...@cloudera.com>
Authored: Mon Sep 28 09:58:08 2015 -0500
Committer: Sergio Pena <se...@cloudera.com>
Committed: Mon Sep 28 09:58:08 2015 -0500
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/metadata/Hive.java | 25 ++++++++++++++++----
1 file changed, 20 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ba21806b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 99896c6..10cafb6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -2686,16 +2686,31 @@ private void constructOneLBLocationMap(FileStatus fSta,
if (srcs.length == 0) {
success = true; // Nothing to move.
}
+
+ /* Move files one by one because source is a subdirectory of destination */
for (FileStatus status : srcs) {
- success = FileUtils.copy(srcf.getFileSystem(conf), status.getPath(), destf.getFileSystem(conf), destf,
- true, // delete source
- replace, // overwrite destination
- conf);
+ Path destFile;
- if (!success) {
+ /* Append the source filename to the destination directory */
+ if (destFs.isDirectory(destf)) {
+ destFile = new Path(destf, status.getPath().getName());
+ } else {
+ destFile = destf;
+ }
+
+ // Destination should be replaced, so we delete it first
+ if (destFs.exists(destFile)) {
+ if (!destFs.delete(destFile, true)) {
+ throw new HiveException(String.format("File to replace could not be deleted: %s", destFile));
+ }
+ }
+
+ if (!(destFs.rename(status.getPath(), destFile))) {
throw new HiveException("Unable to move source " + status.getPath() + " to destination " + destf);
}
}
+
+ success = true;
} else {
success = destFs.rename(srcf, destf);
}