You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2010/01/13 01:11:37 UTC

svn commit: r898581 [1/2] - in /hadoop/hive/trunk: CHANGES.txt ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java ql/src/test/queries/clientpositive/multi_insert.q ql/src/test/results/clientpositive/multi_insert.q.out

Author: namit
Date: Wed Jan 13 00:11:37 2010
New Revision: 898581

URL: http://svn.apache.org/viewvc?rev=898581&view=rev
Log:
HIVE-1039 bug in multi table/directory inserts
(Ning Zhang via namit)


Added:
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/multi_insert.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/multi_insert.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=898581&r1=898580&r2=898581&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Wed Jan 13 00:11:37 2010
@@ -446,6 +446,9 @@
     HIVE-1042 incorrect error for function within transform
     (Paul Yang via namit)
 
+    HIVE-1039 bug in multi table/directory inserts
+    (Ning Zhang via namit)
+
 Release 0.4.0 -  Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java?rev=898581&r1=898580&r2=898581&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java Wed Jan 13 00:11:37 2010
@@ -102,7 +102,8 @@
           // or for a map-reduce job
           if ((parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES) &&
               (((mapredWork)currTask.getWork()).getReducer() == null)) ||
-              parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES))
+              (parseCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES) &&
+              (((mapredWork)currTask.getWork()).getReducer() != null)))
             chDir = true;
         }
       }
@@ -290,7 +291,10 @@
           seenOps.add(currTopOp);
           GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, (mapredWork) mapTask.getWork(), false, ctx);
         }
-        if (ret)
+        // TODO: merge the currTask with mapTask in GenMRUnion1 so that we will
+        // always seen 1 mapTask (which is equals to currTask). After doing this
+        // the block should be removed.
+        if ( ret && mapTask != currTask )
           currTask.removeDependentTask(mvTask);
       }
 

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/multi_insert.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/multi_insert.q?rev=898581&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/multi_insert.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/multi_insert.q Wed Jan 13 00:11:37 2010
@@ -0,0 +1,265 @@
+drop table src_multi1;
+drop table src_multi2;
+create table src_multi1 like src;
+create table src_multi2 like src;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=false;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=true;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+from src
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10 group by key, value
+insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value;
+
+from src
+insert overwrite table src_multi1 select * where key < 10 group by key, value
+insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=true;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10 group by key, value
+insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value;
+
+from src
+insert overwrite table src_multi1 select * where key < 10 group by key, value
+insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=false;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10 group by key, value
+insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value;
+
+from src
+insert overwrite table src_multi1 select * where key < 10 group by key, value
+insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+
+explain
+from src
+insert overwrite table src_multi1 select * where key < 10 group by key, value
+insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value;
+
+from src
+insert overwrite table src_multi1 select * where key < 10 group by key, value
+insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+
+
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+explain
+from (select * from src  union all select * from src) s
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+from (select * from src  union all select * from src) s
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=false;
+
+explain
+from (select * from src  union all select * from src) s
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+from (select * from src  union all select * from src) s
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=true;
+
+explain
+from (select * from src  union all select * from src) s
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+from (select * from src  union all select * from src) s
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+
+explain
+from (select * from src  union all select * from src) s
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+from (select * from src  union all select * from src) s
+insert overwrite table src_multi1 select * where key < 10
+insert overwrite table src_multi2 select * where key > 10 and key < 20;
+
+select * from src_multi1 order by key, value;
+select * from src_multi2 order by key, value;
+
+
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+
+!rm -fr /tmp/hive_test/multiins_local;
+
+explain
+from src 
+insert overwrite local directory '/tmp/hive_test/multiins_local/0' select * where key = 0
+insert overwrite local directory '/tmp/hive_test/multiins_local/2' select * where key = 2
+insert overwrite local directory '/tmp/hive_test/multiins_local/4' select * where key = 4;
+
+from src 
+insert overwrite local directory '/tmp/hive_test/multiins_local/0' select * where key = 0
+insert overwrite local directory '/tmp/hive_test/multiins_local/2' select * where key = 2
+insert overwrite local directory '/tmp/hive_test/multiins_local/4' select * where key = 4;
+
+!ls /tmp/hive_test/multiins_local;
+!rm -fr /tmp/hive_test/multiins_local;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=false;
+
+explain
+from src 
+insert overwrite local directory '/tmp/hive_test/multiins_local/0' select * where key = 0
+insert overwrite local directory '/tmp/hive_test/multiins_local/2' select * where key = 2
+insert overwrite local directory '/tmp/hive_test/multiins_local/4' select * where key = 4;
+
+from src 
+insert overwrite local directory '/tmp/hive_test/multiins_local/0' select * where key = 0
+insert overwrite local directory '/tmp/hive_test/multiins_local/2' select * where key = 2
+insert overwrite local directory '/tmp/hive_test/multiins_local/4' select * where key = 4;
+
+!ls /tmp/hive_test/multiins_local;
+!rm -fr /tmp/hive_test/multiins_local;
+
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=true;
+
+
+explain
+from src 
+insert overwrite local directory '/tmp/hive_test/multiins_local/0' select * where key = 0
+insert overwrite local directory '/tmp/hive_test/multiins_local/2' select * where key = 2
+insert overwrite local directory '/tmp/hive_test/multiins_local/4' select * where key = 4;
+
+from src 
+insert overwrite local directory '/tmp/hive_test/multiins_local/0' select * where key = 0
+insert overwrite local directory '/tmp/hive_test/multiins_local/2' select * where key = 2
+insert overwrite local directory '/tmp/hive_test/multiins_local/4' select * where key = 4;
+
+!ls /tmp/hive_test/multiins_local;
+!rm -fr /tmp/hive_test/multiins_local;
+
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+
+explain
+from src 
+insert overwrite local directory '/tmp/hive_test/multiins_local/0' select * where key = 0
+insert overwrite local directory '/tmp/hive_test/multiins_local/2' select * where key = 2
+insert overwrite local directory '/tmp/hive_test/multiins_local/4' select * where key = 4;
+
+from src 
+insert overwrite local directory '/tmp/hive_test/multiins_local/0' select * where key = 0
+insert overwrite local directory '/tmp/hive_test/multiins_local/2' select * where key = 2
+insert overwrite local directory '/tmp/hive_test/multiins_local/4' select * where key = 4;
+
+!ls /tmp/hive_test/multiins_local;
+!rm -fr /tmp/hive_test/multiins_local;