You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by yc...@apache.org on 2021/09/13 18:12:00 UTC

[hive] branch master updated: HIVE-24936 - Fix file name parsing and copy file move (#2628) (Harish Jaiprakash, reviewed by Kishen Das)

This is an automated email from the ASF dual-hosted git repository.

ychena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new aefbf40  HIVE-24936 - Fix file name parsing and copy file move (#2628) (Harish Jaiprakash, reviewed by Kishen Das)
aefbf40 is described below

commit aefbf40f0b1b2e07ede073418200f8940f91dba7
Author: Harish Jaiprakash <ha...@users.noreply.github.com>
AuthorDate: Mon Sep 13 23:41:45 2021 +0530

    HIVE-24936 - Fix file name parsing and copy file move (#2628) (Harish Jaiprakash, reviewed by Kishen Das)
    
    * HIVE-24936 - Fix file name parsing and copy file move.
    
    * HIVE-25130: handle spark inserted files during alter table concat.
---
 .../hive/ql/exec/AbstractFileMergeOperator.java    |  36 ++---
 .../hadoop/hive/ql/exec/ParsedOutputFileName.java  | 156 ++++++++++++++++++
 .../org/apache/hadoop/hive/ql/exec/Utilities.java  | 168 ++++++++++---------
 .../hive/ql/exec/ParsedOutputFileNameTest.java     | 177 +++++++++++++++++++++
 .../apache/hadoop/hive/ql/exec/TestUtilities.java  |  45 ++++++
 5 files changed, 473 insertions(+), 109 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
index 86cb983..264573b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
@@ -275,39 +275,33 @@ public abstract class AbstractFileMergeOperator<T extends FileMergeDesc>
           throw new HiveException("Incompatible files should not happen in MM tables.");
         }
         Path destDir = finalPath.getParent();
-        Path destPath = destDir;
         // move any incompatible files to final path
         if (incompatFileSet != null && !incompatFileSet.isEmpty()) {
           for (Path incompatFile : incompatFileSet) {
-            // check if path conforms to Hive's file name convention. Hive expects filenames to be in specific format
+            // Hive expects filenames to be in specific format
             // like 000000_0, but "LOAD DATA" commands can let you add any files to any partitions/tables without
-            // renaming. This can cause MoveTask to remove files in some cases where MoveTask assumes the files are
-            // are generated by speculatively executed tasks.
+            // renaming.
+            // This can cause a few issues:
+            // MoveTask will remove files in some cases where MoveTask assumes the files are are generated by
+            // speculatively executed tasks.
             // Example: MoveTask thinks the following files are same
             // part-m-00000_1417075294718
             // part-m-00001_1417075294718
             // Assumes 1417075294718 as taskId and retains only large file supposedly generated by speculative execution.
-            // This can result in data loss in case of CONCATENATE/merging. Filter out files that does not match Hive's
-            // filename convention.
-            if (!Utilities.isHiveManagedFile(incompatFile)) {
-              // rename un-managed files to conform to Hive's naming standard
-              // Example:
-              // /warehouse/table/part-m-00000_1417075294718 will get renamed to /warehouse/table/.hive-staging/000000_0
-              // If staging directory already contains the file, taskId_copy_N naming will be used.
-              final String taskId = Utilities.getTaskId(jc);
-              Path destFilePath = new Path(destDir, new Path(taskId));
-              for (int counter = 1; fs.exists(destFilePath); counter++) {
-                destFilePath = new Path(destDir, taskId + (Utilities.COPY_KEYWORD + counter));
-              }
-              LOG.warn("Path doesn't conform to Hive's expectation. Renaming {} to {}", incompatFile, destFilePath);
-              destPath = destFilePath;
-            }
+            // This can result in data loss in case of CONCATENATE/merging.
 
+            // If filename is consistent with XXXXXX_N and another task with same task-id runs after this move, then
+            // the same file name is used in the other task which will result in task failure and retry of task and
+            // subsequent removal of this file as duplicate.
+            // Example: if the file name is 000001_0 and another task runs with taskid 000001_0, it will fail to create
+            // the file and next attempt will create 000001_1, both the files will be considered as output of same task
+            // and only 000001_1 will be picked resulting it loss of existing file 000001_0.
+            final String destFileName = Utilities.getTaskId(jc) + Utilities.COPY_KEYWORD + 1;
             try {
-              Utilities.renameOrMoveFiles(fs, incompatFile, destPath);
+              Path destPath = Utilities.moveFile(fs, incompatFile, destDir, destFileName);
               LOG.info("Moved incompatible file " + incompatFile + " to " + destPath);
             } catch (HiveException e) {
-              LOG.error("Unable to move " + incompatFile + " to " + destPath);
+              LOG.error("Unable to move " + incompatFile + " to " + destDir + ", " + destFileName);
               throw new IOException(e);
             }
           }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ParsedOutputFileName.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ParsedOutputFileName.java
new file mode 100644
index 0000000..4fc827e
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ParsedOutputFileName.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+
+/**
+ * Helper class to match hive filenames and extract taskId, taskAttemptId, copyIndex.
+ *
+ * Matches following:
+ * 00001_02
+ * 00001_02.gz
+ * 00001_02.zlib.gz
+ * 00001_02_copy_1
+ * 00001_02_copy_1.gz
+ * <p>
+ * All the components are here:
+ * tmp_(taskPrefix)00001_02_copy_1.zlib.gz
+ *
+ * Spark output file:
+ * part-00026-23003837-facb-49ec-b1c4-eeda902cacf3.c000.zlib.orc
+ */
+public class ParsedOutputFileName {
+  private static final Pattern COPY_FILE_NAME_TO_TASK_ID_REGEX = Pattern.compile(
+      "^(.*?)?" + // any prefix
+      "(\\(.*\\))?" + // taskId prefix
+      "([0-9]+)" + // taskId
+      "(?:_([0-9]{1,6}))?" + // _<attemptId> (limited to 6 digits)
+      "(?:_copy_([0-9]{1,6}))?" + // copy file index
+      "(\\..*)?$"); // any suffix/file extension
+
+  private static final Pattern SPARK_FILE_NAME =
+      Pattern.compile("^part-(\\d+)-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\\.[a-f0-9]+(.*)$");
+
+  public static ParsedOutputFileName parse(String fileName) {
+    return new ParsedOutputFileName(fileName);
+  }
+
+  private final boolean matches;
+  private final String taskIdPrefix;
+  private final String taskId;
+  private final String attemptId;
+  private final String copyIndex;
+  private final String suffix;
+  private final CharSequence filePrefixForCopy;
+
+  private ParsedOutputFileName(CharSequence fileName) {
+    Matcher m = SPARK_FILE_NAME.matcher(fileName);
+    if (m.matches()) {
+      matches = true;
+      taskIdPrefix = null;
+      taskId = m.group(1);
+      attemptId = "1";
+      copyIndex = null;
+      String s = m.group(2);
+      suffix = (s == null || s.isEmpty() ? null : s);
+      filePrefixForCopy = null;
+    } else {
+      m = COPY_FILE_NAME_TO_TASK_ID_REGEX.matcher(fileName);
+      matches = m.matches();
+      if (matches) {
+        taskIdPrefix = m.group(2);
+        taskId = m.group(3);
+        attemptId = m.group(4);
+        copyIndex = m.group(5);
+        suffix = m.group(6);
+        filePrefixForCopy = m.end(4) >= 0 ? fileName.subSequence(0, m.end(4)) : null;
+      } else {
+        taskIdPrefix = null;
+        taskId = null;
+        attemptId = null;
+        copyIndex = null;
+        suffix = null;
+        filePrefixForCopy = null;
+      }
+    }
+  }
+
+  public boolean matches() {
+    return matches;
+  }
+
+  public String getTaskIdPrefix() {
+    return taskIdPrefix;
+  }
+
+  public String getTaskId() {
+    return taskId;
+  }
+
+  public String getPrefixedTaskId() {
+    String prefix = getTaskIdPrefix();
+    String taskId = getTaskId();
+    if (prefix != null && taskId != null) {
+      return prefix + taskId;
+    } else {
+      return taskId;
+    }
+  }
+
+  public String getAttemptId() {
+    return attemptId;
+  }
+
+  public boolean isCopyFile() {
+    return copyIndex != null;
+  }
+
+  public String getCopyIndex() {
+    return copyIndex;
+  }
+
+  public String getSuffix() {
+    return suffix;
+  }
+
+  /**
+   * Create a copy file using the same file name as this and the given index. It will keep the prefixes but drop any
+   * suffixes.
+   * Ex: 00001_02 will be converted to 00001_02_copy_3 for idx = 3.
+   * tmp_(prefix)00001_02_copy_1.snappy.orc will be converted to tmp_(prefix)00001_02_copy_3 for idx = 3
+   * @param idx The index required.
+   * @return
+   */
+  public String makeFilenameWithCopyIndex(int idx) throws HiveException {
+    if (filePrefixForCopy == null) {
+      throw new HiveException("Not expected to make copy files of spark output files.");
+    }
+    return filePrefixForCopy + "_copy_" + idx;
+  }
+
+  public String toString() {
+    return "[taskId: " + getPrefixedTaskId() + ", taskAttemptId: " + getAttemptId() +
+        ", copyIndex: " + getCopyIndex() + "]";
+  }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index b6e8ed5..01f0967 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -1098,7 +1098,7 @@ public final class Utilities {
     }
   }
 
-  private static void moveFile(FileSystem fs, FileStatus file, Path dst) throws IOException,
+  private static void moveFileOrDir(FileSystem fs, FileStatus file, Path dst) throws IOException,
       HiveException {
     Path srcFilePath = file.getPath();
     String fileName = srcFilePath.getName();
@@ -1106,32 +1106,61 @@ public final class Utilities {
     if (file.isDir()) {
       renameOrMoveFiles(fs, srcFilePath, dstFilePath);
     } else {
-      if (fs.exists(dstFilePath)) {
-        int suffix = 0;
-        do {
-          suffix++;
-          dstFilePath = new Path(dst, fileName + "_" + suffix);
-        } while (fs.exists(dstFilePath));
-      }
-
-      if (!fs.rename(srcFilePath, dstFilePath)) {
-        throw new HiveException("Unable to move: " + srcFilePath + " to: " + dst);
-      }
+      moveFile(fs, srcFilePath, dst, fileName);
     }
   }
 
   /**
    * Rename src to dst, or in the case dst already exists, move files in src to dst. If there is an
-   * existing file with the same name, the new file's name will be appended with "_1", "_2", etc.
-   *
+   * existing file with the same name, the new file's name will be generated based on the file name.
+   * If the file name confirms to hive managed file NNNNNN_Y(_copy_YY) then it will create NNNNN_Y_copy_XX
+   * else it will append _1, _2, ....
    * @param fs
    *          the FileSystem where src and dst are on.
-   * @param src
-   *          the src directory
-   * @param dst
+   * @param srcFile
+   *          the src file
+   * @param destDir
    *          the target directory
-   * @throws IOException
+   * @param destFileName
+   *          the target filename
+   * @return The final path the file was moved to.
+   * @throws IOException, HiveException
    */
+  public static Path moveFile(FileSystem fs, Path srcFile, Path destDir, String destFileName)
+      throws IOException, HiveException {
+    Path dstFilePath = new Path(destDir, destFileName);
+    if (fs.exists(dstFilePath)) {
+      ParsedOutputFileName parsedFileName = ParsedOutputFileName.parse(destFileName);
+      int suffix = 0;
+      do {
+        suffix++;
+        if (parsedFileName.matches()) {
+          dstFilePath = new Path(destDir, parsedFileName.makeFilenameWithCopyIndex(suffix));
+        } else {
+          dstFilePath = new Path(destDir, destFileName + "_" + suffix);
+        }
+      } while (fs.exists(dstFilePath));
+    }
+    if (!fs.rename(srcFile, dstFilePath)) {
+      throw new HiveException("Unable to move: " + srcFile + " to: " + dstFilePath);
+    }
+    return dstFilePath;
+  }
+
+    /**
+     * Rename src to dst, or in the case dst already exists, move files in src to dst. If there is an
+     * existing file with the same name, the new file's name will be generated based on the file name.
+     * If the file name confirms to hive managed file NNNNNN_Y(_copy_YY) then it will create NNNNN_Y_copy_XX
+     * else it will append _1, _2, ....
+     *
+     * @param fs
+     *          the FileSystem where src and dst are on.
+     * @param src
+     *          the src directory
+     * @param dst
+     *          the target directory
+     * @throws IOException
+     */
   public static void renameOrMoveFiles(FileSystem fs, Path src, Path dst) throws IOException,
       HiveException {
     if (!fs.exists(dst)) {
@@ -1142,7 +1171,7 @@ public final class Utilities {
       // move file by file
       FileStatus[] files = fs.listStatus(src);
       for (FileStatus file : files) {
-        Utilities.moveFile(fs, file, dst);
+        Utilities.moveFileOrDir(fs, file, dst);
       }
     }
   }
@@ -1180,7 +1209,7 @@ public final class Utilities {
           @Override
           public Void call() throws HiveException {
             try {
-              Utilities.moveFile(fs, file, dst);
+              Utilities.moveFileOrDir(fs, file, dst);
             } catch (Exception e) {
               throw new HiveException(e);
             }
@@ -1194,39 +1223,7 @@ public final class Utilities {
     }
   }
 
-  /**
-   * The first group will contain the task id. The second group is the optional extension. The file
-   * name looks like: "0_0" or "0_0.gz". There may be a leading prefix (tmp_). Since getTaskId() can
-   * return an integer only - this should match a pure integer as well. {1,6} is used to limit
-   * matching for attempts #'s 0-999999.
-   */
-  private static final Pattern FILE_NAME_TO_TASK_ID_REGEX =
-      Pattern.compile("^.*?([0-9]+)(_[0-9]{1,6})?(\\..*)?$");
-
-  /**
-   * Some jobs like "INSERT INTO" jobs create copies of files like 0000001_0_copy_2.
-   * For such files,
-   * Group 1: 00000001 [taskId]
-   * Group 3: 0        [task attempId]
-   * Group 4: _copy_2  [copy suffix]
-   * Group 6: copy     [copy keyword]
-   * Group 8: 2        [copy file index]
-   */
   public static final String COPY_KEYWORD = "_copy_"; // copy keyword
-  private static final Pattern COPY_FILE_NAME_TO_TASK_ID_REGEX =
-      Pattern.compile("^.*?"+ // any prefix
-                      "([0-9]+)"+ // taskId
-                      "(_)"+ // separator
-                      "([0-9]{1,6})?"+ // attemptId (limited to 6 digits)
-                      "((_)(\\Bcopy\\B)(_)" +
-                      "([0-9]{1,6})$)?"+ // copy file index
-                      "(\\..*)?$"); // any suffix/file extension
-
-  /**
-   * This retruns prefix part + taskID for bucket join for partitioned table
-   */
-  private static final Pattern FILE_NAME_PREFIXED_TASK_ID_REGEX =
-      Pattern.compile("^.*?((\\(.*\\))?[0-9]+)(_[0-9]{1,6})?(\\..*)?$");
 
   /**
    * This breaks a prefixed bucket number into the prefix and the taskID
@@ -1247,7 +1244,7 @@ public final class Utilities {
    *          filename to extract taskid from
    */
   public static String getTaskIdFromFilename(String filename) {
-    return getTaskIdFromFilename(filename, FILE_NAME_TO_TASK_ID_REGEX);
+    return getIdFromFilename(filename, false, false);
   }
 
   /**
@@ -1258,33 +1255,38 @@ public final class Utilities {
    *          filename to extract taskid from
    */
   private static String getPrefixedTaskIdFromFilename(String filename) {
-    return getTaskIdFromFilename(filename, FILE_NAME_PREFIXED_TASK_ID_REGEX);
-  }
-
-  private static String getTaskIdFromFilename(String filename, Pattern pattern) {
-    return getIdFromFilename(filename, pattern, 1);
+    return getIdFromFilename(filename, true, false);
   }
 
   private static int getAttemptIdFromFilename(String filename) {
-    String attemptStr = getIdFromFilename(filename, FILE_NAME_PREFIXED_TASK_ID_REGEX, 3);
-    return Integer.parseInt(attemptStr.substring(1));
+    return Integer.parseInt(getIdFromFilename(filename, true, true));
   }
 
-  private static String getIdFromFilename(String filename, Pattern pattern, int group) {
-    String taskId = filename;
-    int dirEnd = filename.lastIndexOf(Path.SEPARATOR);
+  private static String getIdFromFilename(String filepath, boolean isPrefixed, boolean isTaskAttempt) {
+    String filename = filepath;
+    int dirEnd = filepath.lastIndexOf(Path.SEPARATOR);
     if (dirEnd != -1) {
-      taskId = filename.substring(dirEnd + 1);
+      filename = filepath.substring(dirEnd + 1);
     }
 
-    Matcher m = pattern.matcher(taskId);
-    if (!m.matches()) {
+    ParsedOutputFileName parsedOutputFileName = ParsedOutputFileName.parse(filename);
+    String taskId;
+    if (parsedOutputFileName.matches()) {
+      if (isTaskAttempt) {
+        taskId = parsedOutputFileName.getAttemptId();
+      } else {
+        taskId = isPrefixed ? parsedOutputFileName.getPrefixedTaskId() : parsedOutputFileName.getTaskId();
+      }
+    } else {
+      taskId = filename;
       LOG.warn("Unable to get task id from file name: {}. Using last component {}"
-          + " as task id.", filename, taskId);
+          + " as task id.", filepath, taskId);
+    }
+    if (isTaskAttempt) {
+      LOG.debug("TaskAttemptId for {} = {}", filepath, taskId);
     } else {
-      taskId = m.group(group);
+      LOG.debug("TaskId for {} = {}", filepath, taskId);
     }
-    LOG.debug("TaskId for {} = {}", filename, taskId);
     return taskId;
   }
 
@@ -1570,7 +1572,7 @@ public final class Utilities {
           public Void call() throws HiveException {
             try {
               LOG.debug("Moving from {} to {} ", fileStatus.getPath(), dst);
-              Utilities.moveFile(fs, fileStatus, dst);
+              Utilities.moveFileOrDir(fs, fileStatus, dst);
             } catch (Exception e) {
               throw new HiveException(e);
             }
@@ -2029,27 +2031,17 @@ public final class Utilities {
     return toRetain;
   }
 
-  public static boolean isCopyFile(String filename) {
-    String taskId = filename;
-    String copyFileSuffix = null;
-    int dirEnd = filename.lastIndexOf(Path.SEPARATOR);
+  public static boolean isCopyFile(String filepath) {
+    String filename = filepath;
+    int dirEnd = filepath.lastIndexOf(Path.SEPARATOR);
     if (dirEnd != -1) {
-      taskId = filename.substring(dirEnd + 1);
-    }
-    Matcher m = COPY_FILE_NAME_TO_TASK_ID_REGEX.matcher(taskId);
-    if (!m.matches()) {
-      LOG.warn("Unable to verify if file name {} has _copy_ suffix.", filename);
-    } else {
-      taskId = m.group(1);
-      copyFileSuffix = m.group(4);
+      filename = filepath.substring(dirEnd + 1);
     }
-
-    LOG.debug("Filename: {} TaskId: {} CopySuffix: {}", filename, taskId, copyFileSuffix);
-    if (taskId != null && copyFileSuffix != null) {
-      return true;
+    ParsedOutputFileName parsedFileName = ParsedOutputFileName.parse(filename);
+    if (!parsedFileName.matches()) {
+      LOG.warn("Unable to verify if file name {} has _copy_ suffix.", filepath);
     }
-
-    return false;
+    return parsedFileName.isCopyFile();
   }
 
   public static String getBucketFileNameFromPathSubString(String bucketName) {
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/ParsedOutputFileNameTest.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/ParsedOutputFileNameTest.java
new file mode 100644
index 0000000..50c3615
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/ParsedOutputFileNameTest.java
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class ParsedOutputFileNameTest {
+  @Test
+  public void testStandardNoAttemptId() {
+    ParsedOutputFileName p = ParsedOutputFileName.parse("00001");
+    Assert.assertTrue(p.matches());
+    Assert.assertNull(p.getTaskIdPrefix());
+    Assert.assertEquals("00001", p.getTaskId());
+    Assert.assertEquals("00001", p.getPrefixedTaskId());
+    Assert.assertNull(p.getAttemptId());
+    Assert.assertNull(p.getCopyIndex());
+    Assert.assertFalse(p.isCopyFile());
+    Assert.assertNull(p.getSuffix());
+  }
+
+  @Test
+  public void testStandard() throws Exception {
+    ParsedOutputFileName p = ParsedOutputFileName.parse("00001_02");
+    Assert.assertTrue(p.matches());
+    Assert.assertNull(p.getTaskIdPrefix());
+    Assert.assertEquals("00001", p.getTaskId());
+    Assert.assertEquals("00001", p.getPrefixedTaskId());
+    Assert.assertEquals("02", p.getAttemptId());
+    Assert.assertNull(p.getCopyIndex());
+    Assert.assertFalse(p.isCopyFile());
+    Assert.assertNull(p.getSuffix());
+    Assert.assertEquals("00001_02_copy_3", p.makeFilenameWithCopyIndex(3));
+  }
+
+  @Test
+  public void testStandardPrefix() throws Exception {
+    ParsedOutputFileName p = ParsedOutputFileName.parse("(prefix)00001_02");
+    Assert.assertTrue(p.matches());
+    Assert.assertEquals("(prefix)", p.getTaskIdPrefix());
+    Assert.assertEquals("00001", p.getTaskId());
+    Assert.assertEquals("(prefix)00001", p.getPrefixedTaskId());
+    Assert.assertEquals("02", p.getAttemptId());
+    Assert.assertNull(p.getCopyIndex());
+    Assert.assertFalse(p.isCopyFile());
+    Assert.assertNull(p.getSuffix());
+    Assert.assertEquals("(prefix)00001_02_copy_3", p.makeFilenameWithCopyIndex(3));
+  }
+
+  @Test
+  public void testStandardSuffix() throws Exception {
+    ParsedOutputFileName p = ParsedOutputFileName.parse("00001_02.snappy.orc");
+    Assert.assertTrue(p.matches());
+    Assert.assertNull(p.getTaskIdPrefix());
+    Assert.assertEquals("00001", p.getTaskId());
+    Assert.assertEquals("00001", p.getPrefixedTaskId());
+    Assert.assertEquals("02", p.getAttemptId());
+    Assert.assertNull(p.getCopyIndex());
+    Assert.assertFalse(p.isCopyFile());
+    Assert.assertEquals(".snappy.orc", p.getSuffix());
+    Assert.assertEquals("00001_02_copy_3", p.makeFilenameWithCopyIndex(3));
+  }
+
+  @Test
+  public void testPrefixAndSuffix() throws Exception {
+    ParsedOutputFileName p = ParsedOutputFileName.parse("tmp_(prefix)00001_02.snappy.orc");
+    Assert.assertTrue(p.matches());
+    Assert.assertEquals("(prefix)", p.getTaskIdPrefix());
+    Assert.assertEquals("00001", p.getTaskId());
+    Assert.assertEquals("(prefix)00001", p.getPrefixedTaskId());
+    Assert.assertEquals("02", p.getAttemptId());
+    Assert.assertNull(p.getCopyIndex());
+    Assert.assertFalse(p.isCopyFile());
+    Assert.assertEquals(".snappy.orc", p.getSuffix());
+    Assert.assertEquals("tmp_(prefix)00001_02_copy_3", p.makeFilenameWithCopyIndex(3));
+  }
+
+  @Test
+  public void testCopy() throws Exception {
+    ParsedOutputFileName p = ParsedOutputFileName.parse("00001_02_copy_3");
+    Assert.assertTrue(p.matches());
+    Assert.assertNull(p.getTaskIdPrefix());
+    Assert.assertEquals("00001", p.getTaskId());
+    Assert.assertEquals("00001", p.getPrefixedTaskId());
+    Assert.assertEquals("02", p.getAttemptId());
+    Assert.assertEquals("3", p.getCopyIndex());
+    Assert.assertTrue(p.isCopyFile());
+    Assert.assertNull(p.getSuffix());
+    Assert.assertEquals("00001_02_copy_4", p.makeFilenameWithCopyIndex(4));
+  }
+
+  @Test
+  public void testCopyAllParts() throws Exception {
+    ParsedOutputFileName p = ParsedOutputFileName.parse("tmp_(prefix)00001_02_copy_3.snappy.orc");
+    Assert.assertTrue(p.matches());
+    Assert.assertEquals("(prefix)", p.getTaskIdPrefix());
+    Assert.assertEquals("00001", p.getTaskId());
+    Assert.assertEquals("(prefix)00001", p.getPrefixedTaskId());
+    Assert.assertEquals("02", p.getAttemptId());
+    Assert.assertEquals("3", p.getCopyIndex());
+    Assert.assertTrue(p.isCopyFile());
+    Assert.assertEquals(".snappy.orc", p.getSuffix());
+    Assert.assertEquals("tmp_(prefix)00001_02_copy_4", p.makeFilenameWithCopyIndex(4));
+  }
+
+  @Test
+  public void testSparkFileNameSuffixed() {
+    ParsedOutputFileName p = ParsedOutputFileName.parse(
+        "part-00026-23003837-facb-49ec-b1c4-eeda902cacf3.c000.zlib.orc");
+    Assert.assertTrue(p.matches());
+    Assert.assertEquals("00026", p.getTaskId());
+    Assert.assertEquals("00026", p.getPrefixedTaskId());
+    Assert.assertNull(p.getTaskIdPrefix());
+    Assert.assertEquals("1", p.getAttemptId());
+    Assert.assertNull(p.getCopyIndex());
+    Assert.assertFalse(p.isCopyFile());
+    Assert.assertEquals(".zlib.orc", p.getSuffix());
+    try {
+      p.makeFilenameWithCopyIndex(1);
+      Assert.fail("Expected HiveException");
+    } catch(HiveException e) {
+    }
+  }
+
+  @Test
+  public void testSparkFileName() {
+    ParsedOutputFileName p = ParsedOutputFileName.parse("part-00003-c6acfdee-0c32-492e-b209-c2f1cf477770.c000");
+    Assert.assertTrue(p.matches());
+    Assert.assertEquals("00003", p.getTaskId());
+    Assert.assertEquals("00003", p.getPrefixedTaskId());
+    Assert.assertNull(p.getTaskIdPrefix());
+    Assert.assertEquals("1", p.getAttemptId());
+    Assert.assertNull(p.getCopyIndex());
+    Assert.assertFalse(p.isCopyFile());
+    Assert.assertNull(p.getSuffix());
+    try {
+      p.makeFilenameWithCopyIndex(1);
+      Assert.fail("Expected HiveException");
+    } catch(HiveException e) {
+    }
+  }
+
+  @Test
+  public void testNoMatch() {
+    ParsedOutputFileName p = ParsedOutputFileName.parse("ZfsLke");
+    Assert.assertFalse(p.matches());
+    Assert.assertNull(p.getTaskId());
+    Assert.assertNull(p.getPrefixedTaskId());
+    Assert.assertNull(p.getTaskIdPrefix());
+    Assert.assertNull(p.getAttemptId());
+    Assert.assertNull(p.getCopyIndex());
+    Assert.assertFalse(p.isCopyFile());
+    Assert.assertNull(p.getSuffix());
+    try {
+      p.makeFilenameWithCopyIndex(1);
+      Assert.fail("Expected HiveException");
+    } catch(HiveException e) {
+    }
+  }
+}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java
index c46e83b..2cdebe5 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java
@@ -201,6 +201,51 @@ public class TestUtilities {
     assertEquals(NUM_BUCKETS, paths.size());
   }
 
+  @Test
+  public void testRenameFilesNotExists() throws Exception {
+    FileSystem fs = mock(FileSystem.class);
+    Path src = new Path("src");
+    Path dest = new Path("dir");
+    when(fs.exists(dest)).thenReturn(false);
+    when(fs.rename(src, dest)).thenReturn(true);
+    Utilities.renameOrMoveFiles(fs, src, dest);
+    verify(fs, times(1)).rename(src, dest);
+  }
+
+  @Test
+  public void testRenameFileExistsNonHive() throws Exception {
+    FileSystem fs = mock(FileSystem.class);
+    Path src = new Path("src");
+    Path dest = new Path("dir1");
+    Path finalPath = new Path(dest, "src_2");
+    FileStatus status = new FileStatus();
+    status.setPath(src);
+    when(fs.listStatus(src)).thenReturn(new FileStatus[]{status});
+    when(fs.exists(dest)).thenReturn(true);
+    when(fs.exists(new Path(dest, "src"))).thenReturn(true);
+    when(fs.exists(new Path(dest,"src_1"))).thenReturn(true);
+    when(fs.rename(src, finalPath)).thenReturn(true);
+    Utilities.renameOrMoveFiles(fs, src, dest);
+    verify(fs, times(1)).rename(src, finalPath);
+  }
+
+  @Test
+  public void testRenameFileExistsHivePath() throws Exception {
+    FileSystem fs = mock(FileSystem.class);
+    Path src = new Path("00001_02");
+    Path dest = new Path("dir1");
+    Path finalPath = new Path(dest, "00001_02_copy_2");
+    FileStatus status = new FileStatus();
+    status.setPath(src);
+    when(fs.listStatus(src)).thenReturn(new FileStatus[]{status});
+    when(fs.exists(dest)).thenReturn(true);
+    when(fs.exists(new Path(dest, "00001_02"))).thenReturn(true);
+    when(fs.exists(new Path(dest,"00001_02_copy_1"))).thenReturn(true);
+    when(fs.rename(src, finalPath)).thenReturn(true);
+    Utilities.renameOrMoveFiles(fs, src, dest);
+    verify(fs, times(1)).rename(src, finalPath);
+  }
+
   private List<Path> runRemoveTempOrDuplicateFilesTestCase(String executionEngine, boolean dPEnabled)
       throws Exception {
     Configuration hconf = new HiveConf(this.getClass());