You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/12/07 12:52:41 UTC

svn commit: r1418286 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/ java/org/apache/hadoop/hive/ql/plan/ test/queries/clientpositive/

Author: namit
Date: Fri Dec  7 11:52:36 2012
New Revision: 1418286

URL: http://svn.apache.org/viewvc?rev=1418286&view=rev
Log:
HIVE-3767 BucketizedHiveInputFormat should be automatically used with 
Bucketized Map Joins also (Gang Tim Liu via namit)



Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
    hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_1.q
    hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_2.q
    hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin13.q
    hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin6.q

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java?rev=1418286&r1=1418285&r2=1418286&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java Fri Dec  7 11:52:36 2012
@@ -40,10 +40,10 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.UnionOperator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
@@ -351,6 +351,8 @@ public class BucketMapJoinOptimizer impl
       if (bigTablePartitioned) {
         desc.setBigTablePartSpecToFileMapping(convert(bigTblPartsToBucketFileNames));
       }
+      // successfully convert to bucket map join
+      desc.setBucketMapJoin(true);
 
       return true;
     }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1418286&r1=1418285&r2=1418286&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Fri Dec  7 11:52:36 2012
@@ -288,7 +288,11 @@ public final class GenMapRedUtils {
         bucketMJCxt.setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class);
         bucketMJCxt.setBigTablePartSpecToFileMapping(
           currMapJoinOp.getConf().getBigTablePartSpecToFileMapping());
-        plan.setUseBucketizedHiveInputFormat(currMapJoinOp instanceof SMBMapJoinOperator);
+        // BucketizedHiveInputFormat should be used for either sort merge join or bucket map join
+        if ((currMapJoinOp instanceof SMBMapJoinOperator)
+            || (currMapJoinOp.getConf().isBucketMapJoin())) {
+          plan.setUseBucketizedHiveInputFormat(true);
+        }
       }
     }
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java?rev=1418286&r1=1418285&r2=1418286&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java Fri Dec  7 11:52:36 2012
@@ -54,6 +54,9 @@ public class MapJoinDesc extends JoinDes
   //map join dump file name
   private String dumpFilePrefix;
 
+  // flag for bucket map join. One usage is to set BucketizedHiveInputFormat
+  private boolean isBucketMapJoin;
+
   public MapJoinDesc() {
     bigTableBucketNumMapping = new LinkedHashMap<String, Integer>();
   }
@@ -233,4 +236,12 @@ public class MapJoinDesc extends JoinDes
   public void setBigTablePartSpecToFileMapping(Map<String, List<String>> partToFileMapping) {
     this.bigTablePartSpecToFileMapping = partToFileMapping;
   }
+
+  public boolean isBucketMapJoin() {
+    return isBucketMapJoin;
+  }
+
+  public void setBucketMapJoin(boolean isBucketMapJoin) {
+    this.isBucketMapJoin = isBucketMapJoin;
+  }
 }

Modified: hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_1.q?rev=1418286&r1=1418285&r2=1418286&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_1.q Fri Dec  7 11:52:36 2012
@@ -14,7 +14,6 @@ load data local inpath '../data/files/So
 
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
-set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
 
 -- The tables are bucketed in same columns in different order,
 -- but sorted in different column orders

Modified: hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_2.q?rev=1418286&r1=1418285&r2=1418286&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_2.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_2.q Fri Dec  7 11:52:36 2012
@@ -14,7 +14,6 @@ load data local inpath '../data/files/So
 
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
-set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
 
 -- The tables are bucketed in same columns in different order,
 -- but sorted in different column orders

Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin13.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin13.q?rev=1418286&r1=1418285&r2=1418286&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin13.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin13.q Fri Dec  7 11:52:36 2012
@@ -22,7 +22,6 @@ CLUSTERED BY (key) INTO 2 BUCKETS;
 INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
 SELECT * FROM src;
 
-set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
 set hive.optimize.bucketmapjoin=true;
 
 -- part=1 partition for srcbucket_mapjoin_part_1 is bucketed by 'value'

Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin6.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin6.q?rev=1418286&r1=1418285&r2=1418286&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin6.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin6.q Fri Dec  7 11:52:36 2012
@@ -15,8 +15,6 @@ insert overwrite table tmp2 select * fro
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
 set hive.merge.mapfiles=false;
-set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
-
 create table tmp3 (a string, b string, c string) clustered by (a) sorted by (a) into 10 buckets;