You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2010/10/15 06:23:50 UTC

svn commit: r1022817 - in /hadoop/hive/trunk: CHANGES.txt ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/test/queries/clientpositive/input43.q ql/src/test/results/clientpositive/input43.q.out

Author: namit
Date: Fri Oct 15 04:23:50 2010
New Revision: 1022817

URL: http://svn.apache.org/viewvc?rev=1022817&view=rev
Log:
HIVE-307 "LOAD DATA LOCAL INPATH" fails when the table already contains a file of
the same name (Kirk True via namit)


Added:
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/input43.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input43.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=1022817&r1=1022816&r2=1022817&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Fri Oct 15 04:23:50 2010
@@ -363,6 +363,9 @@ Trunk -  Unreleased
     HIVE-1713 Duplicated MapRedTask in Multi-table inserts mixed with FileSinkOperator
     and ReduceSinkOperator (Ning Zhang via namit)
 
+    HIVE-307 "LOAD DATA LOCAL INPATH" fails when the table already contains a file of
+    the same name (Kirk True via namit)
+
   TESTS
 
     HIVE-1464. improve  test query performance

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1022817&r1=1022816&r2=1022817&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Fri Oct 15 04:23:50 2010
@@ -1342,22 +1342,52 @@ public class Hive {
       for (FileStatus src : srcs) {
         FileStatus[] items = fs.listStatus(src.getPath());
         for (FileStatus item : items) {
+          Path itemStaging = item.getPath();
 
           if (Utilities.isTempPath(item)) {
             // This check is redundant because temp files are removed by
             // execution layer before
             // calling loadTable/Partition. But leaving it in just in case.
-            fs.delete(item.getPath(), true);
+            fs.delete(itemStaging, true);
             continue;
           }
           if (item.isDir()) {
             throw new HiveException("checkPaths: " + src.getPath()
-                + " has nested directory" + item.getPath());
+                + " has nested directory" + itemStaging);
           }
-          Path tmpDest = new Path(destf, item.getPath().getName());
-          if (!replace && fs.exists(tmpDest)) {
-            throw new HiveException("checkPaths: " + tmpDest
-                + " already exists");
+          if (!replace) {
+            // It's possible that the file we're copying may have the same
+            // relative name as an existing file in the "destf" directory.
+            // So let's make a quick check to see if we can rename any
+            // potential offenders so as to allow them to move into the
+            // "destf" directory. The scheme is dead simple: simply tack
+            // on "_copy_N" where N starts at 1 and works its way up until
+            // we find a free space.
+
+            // Note: there are race conditions here, but I don't believe
+            // they're worse than what was already present.
+            int counter = 1;
+            Path itemDest = new Path(destf, itemStaging.getName());
+
+            while (fs.exists(itemDest)) {
+              Path proposedStaging = itemStaging.suffix("_copy_" + counter++);
+              Path proposedDest = new Path(destf, proposedStaging.getName());
+
+              if (fs.exists(proposedDest)) {
+                // There's already a file in our destination directory with our
+                // _copy_N suffix. We've been here before...
+                LOG.trace(proposedDest + " already exists");
+                continue;
+              }
+
+              if (!fs.rename(itemStaging, proposedStaging)) {
+                LOG.debug("Unsuccessfully in attempt to rename " + itemStaging + " to " + proposedStaging + "...");
+                continue;
+              }
+
+              LOG.debug("Successfully renamed " + itemStaging + " to " + proposedStaging);
+              itemDest = proposedDest;
+            }
           }
         }
       }

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/input43.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/input43.q?rev=1022817&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/input43.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/input43.q Fri Oct 15 04:23:50 2010
@@ -0,0 +1,7 @@
+drop table tst_src1;
+create table tst_src1 like src1;
+load data local inpath '../data/files/kv1.txt' into table tst_src1 ;
+select count(1) from tst_src1;
+load data local inpath '../data/files/kv1.txt' into table tst_src1 ;
+select count(1) from tst_src1;
+drop table tst_src1;

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/input43.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/input43.q.out?rev=1022817&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/input43.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/input43.q.out Fri Oct 15 04:23:50 2010
@@ -0,0 +1,45 @@
+PREHOOK: query: drop table tst_src1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table tst_src1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table tst_src1 like src1
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table tst_src1 like src1
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@tst_src1
+PREHOOK: query: load data local inpath '../data/files/kv1.txt' into table tst_src1
+PREHOOK: type: LOAD
+POSTHOOK: query: load data local inpath '../data/files/kv1.txt' into table tst_src1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@tst_src1
+PREHOOK: query: select count(1) from tst_src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tst_src1
+PREHOOK: Output: file:/tmp/kirk/hive_2010-10-13_23-38-37_796_1122842389749817180/-mr-10000
+POSTHOOK: query: select count(1) from tst_src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tst_src1
+POSTHOOK: Output: file:/tmp/kirk/hive_2010-10-13_23-38-37_796_1122842389749817180/-mr-10000
+500
+PREHOOK: query: load data local inpath '../data/files/kv1.txt' into table tst_src1
+PREHOOK: type: LOAD
+POSTHOOK: query: load data local inpath '../data/files/kv1.txt' into table tst_src1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@tst_src1
+PREHOOK: query: select count(1) from tst_src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tst_src1
+PREHOOK: Output: file:/tmp/kirk/hive_2010-10-13_23-38-40_764_2860500961795681259/-mr-10000
+POSTHOOK: query: select count(1) from tst_src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tst_src1
+POSTHOOK: Output: file:/tmp/kirk/hive_2010-10-13_23-38-40_764_2860500961795681259/-mr-10000
+1000
+PREHOOK: query: drop table tst_src1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tst_src1
+PREHOOK: Output: default@tst_src1
+POSTHOOK: query: drop table tst_src1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tst_src1
+POSTHOOK: Output: default@tst_src1