You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2010/10/15 06:23:50 UTC
svn commit: r1022817 - in /hadoop/hive/trunk: CHANGES.txt
ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
ql/src/test/queries/clientpositive/input43.q
ql/src/test/results/clientpositive/input43.q.out
Author: namit
Date: Fri Oct 15 04:23:50 2010
New Revision: 1022817
URL: http://svn.apache.org/viewvc?rev=1022817&view=rev
Log:
HIVE-307 "LOAD DATA LOCAL INPATH" fails when the table already contains a file of
the same name (Kirk True via namit)
Added:
hadoop/hive/trunk/ql/src/test/queries/clientpositive/input43.q
hadoop/hive/trunk/ql/src/test/results/clientpositive/input43.q.out
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=1022817&r1=1022816&r2=1022817&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Fri Oct 15 04:23:50 2010
@@ -363,6 +363,9 @@ Trunk - Unreleased
HIVE-1713 Duplicated MapRedTask in Multi-table inserts mixed with FileSinkOperator
and ReduceSinkOperator (Ning Zhang via namit)
+ HIVE-307 "LOAD DATA LOCAL INPATH" fails when the table already contains a file of
+ the same name (Kirk True via namit)
+
TESTS
HIVE-1464. improve test query performance
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1022817&r1=1022816&r2=1022817&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Fri Oct 15 04:23:50 2010
@@ -1342,22 +1342,52 @@ public class Hive {
for (FileStatus src : srcs) {
FileStatus[] items = fs.listStatus(src.getPath());
for (FileStatus item : items) {
+ Path itemStaging = item.getPath();
if (Utilities.isTempPath(item)) {
// This check is redundant because temp files are removed by
// execution layer before
// calling loadTable/Partition. But leaving it in just in case.
- fs.delete(item.getPath(), true);
+ fs.delete(itemStaging, true);
continue;
}
if (item.isDir()) {
throw new HiveException("checkPaths: " + src.getPath()
- + " has nested directory" + item.getPath());
+ + " has nested directory" + itemStaging);
}
- Path tmpDest = new Path(destf, item.getPath().getName());
- if (!replace && fs.exists(tmpDest)) {
- throw new HiveException("checkPaths: " + tmpDest
- + " already exists");
+ if (!replace) {
+ // It's possible that the file we're copying may have the same
+ // relative name as an existing file in the "destf" directory.
+ // So let's make a quick check to see if we can rename any
+ // potential offenders so as to allow them to move into the
+ // "destf" directory. The scheme is dead simple: simply tack
+ // on "_copy_N" where N starts at 1 and works its way up until
+ // we find a free space.
+
+ // Note: there are race conditions here, but I don't believe
+ // they're worse than what was already present.
+ int counter = 1;
+ Path itemDest = new Path(destf, itemStaging.getName());
+
+ while (fs.exists(itemDest)) {
+ Path proposedStaging = itemStaging.suffix("_copy_" + counter++);
+ Path proposedDest = new Path(destf, proposedStaging.getName());
+
+ if (fs.exists(proposedDest)) {
+ // There's already a file in our destination directory with our
+ // _copy_N suffix. We've been here before...
+ LOG.trace(proposedDest + " already exists");
+ continue;
+ }
+
+ if (!fs.rename(itemStaging, proposedStaging)) {
+ LOG.debug("Unsuccessfully in attempt to rename " + itemStaging + " to " + proposedStaging + "...");
+ continue;
+ }
+
+ LOG.debug("Successfully renamed " + itemStaging + " to " + proposedStaging);
+ itemDest = proposedDest;
+ }
}
}
}
Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/input43.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/input43.q?rev=1022817&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/input43.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/input43.q Fri Oct 15 04:23:50 2010
@@ -0,0 +1,7 @@
+drop table tst_src1;
+create table tst_src1 like src1;
+load data local inpath '../data/files/kv1.txt' into table tst_src1 ;
+select count(1) from tst_src1;
+load data local inpath '../data/files/kv1.txt' into table tst_src1 ;
+select count(1) from tst_src1;
+drop table tst_src1;
Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/input43.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/input43.q.out?rev=1022817&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/input43.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/input43.q.out Fri Oct 15 04:23:50 2010
@@ -0,0 +1,45 @@
+PREHOOK: query: drop table tst_src1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table tst_src1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table tst_src1 like src1
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table tst_src1 like src1
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@tst_src1
+PREHOOK: query: load data local inpath '../data/files/kv1.txt' into table tst_src1
+PREHOOK: type: LOAD
+POSTHOOK: query: load data local inpath '../data/files/kv1.txt' into table tst_src1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@tst_src1
+PREHOOK: query: select count(1) from tst_src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tst_src1
+PREHOOK: Output: file:/tmp/kirk/hive_2010-10-13_23-38-37_796_1122842389749817180/-mr-10000
+POSTHOOK: query: select count(1) from tst_src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tst_src1
+POSTHOOK: Output: file:/tmp/kirk/hive_2010-10-13_23-38-37_796_1122842389749817180/-mr-10000
+500
+PREHOOK: query: load data local inpath '../data/files/kv1.txt' into table tst_src1
+PREHOOK: type: LOAD
+POSTHOOK: query: load data local inpath '../data/files/kv1.txt' into table tst_src1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@tst_src1
+PREHOOK: query: select count(1) from tst_src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tst_src1
+PREHOOK: Output: file:/tmp/kirk/hive_2010-10-13_23-38-40_764_2860500961795681259/-mr-10000
+POSTHOOK: query: select count(1) from tst_src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tst_src1
+POSTHOOK: Output: file:/tmp/kirk/hive_2010-10-13_23-38-40_764_2860500961795681259/-mr-10000
+1000
+PREHOOK: query: drop table tst_src1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tst_src1
+PREHOOK: Output: default@tst_src1
+POSTHOOK: query: drop table tst_src1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tst_src1
+POSTHOOK: Output: default@tst_src1