You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/12/07 12:52:41 UTC
svn commit: r1418286 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/optimizer/
java/org/apache/hadoop/hive/ql/plan/ test/queries/clientpositive/
Author: namit
Date: Fri Dec 7 11:52:36 2012
New Revision: 1418286
URL: http://svn.apache.org/viewvc?rev=1418286&view=rev
Log:
HIVE-3767 BucketizedHiveInputFormat should be automatically used with
Bucketized Map Joins also (Gang Tim Liu via namit)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_1.q
hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_2.q
hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin13.q
hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin6.q
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java?rev=1418286&r1=1418285&r2=1418286&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java Fri Dec 7 11:52:36 2012
@@ -40,10 +40,10 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.UnionOperator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
@@ -351,6 +351,8 @@ public class BucketMapJoinOptimizer impl
if (bigTablePartitioned) {
desc.setBigTablePartSpecToFileMapping(convert(bigTblPartsToBucketFileNames));
}
+ // successfully convert to bucket map join
+ desc.setBucketMapJoin(true);
return true;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1418286&r1=1418285&r2=1418286&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Fri Dec 7 11:52:36 2012
@@ -288,7 +288,11 @@ public final class GenMapRedUtils {
bucketMJCxt.setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class);
bucketMJCxt.setBigTablePartSpecToFileMapping(
currMapJoinOp.getConf().getBigTablePartSpecToFileMapping());
- plan.setUseBucketizedHiveInputFormat(currMapJoinOp instanceof SMBMapJoinOperator);
+ // BucketizedHiveInputFormat should be used for either sort merge join or bucket map join
+ if ((currMapJoinOp instanceof SMBMapJoinOperator)
+ || (currMapJoinOp.getConf().isBucketMapJoin())) {
+ plan.setUseBucketizedHiveInputFormat(true);
+ }
}
}
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java?rev=1418286&r1=1418285&r2=1418286&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java Fri Dec 7 11:52:36 2012
@@ -54,6 +54,9 @@ public class MapJoinDesc extends JoinDes
//map join dump file name
private String dumpFilePrefix;
+ // flag for bucket map join. One usage is to set BucketizedHiveInputFormat
+ private boolean isBucketMapJoin;
+
public MapJoinDesc() {
bigTableBucketNumMapping = new LinkedHashMap<String, Integer>();
}
@@ -233,4 +236,12 @@ public class MapJoinDesc extends JoinDes
public void setBigTablePartSpecToFileMapping(Map<String, List<String>> partToFileMapping) {
this.bigTablePartSpecToFileMapping = partToFileMapping;
}
+
+ public boolean isBucketMapJoin() {
+ return isBucketMapJoin;
+ }
+
+ public void setBucketMapJoin(boolean isBucketMapJoin) {
+ this.isBucketMapJoin = isBucketMapJoin;
+ }
}
Modified: hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_1.q?rev=1418286&r1=1418285&r2=1418286&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_1.q Fri Dec 7 11:52:36 2012
@@ -14,7 +14,6 @@ load data local inpath '../data/files/So
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
-set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
-- The tables are bucketed in same columns in different order,
-- but sorted in different column orders
Modified: hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_2.q?rev=1418286&r1=1418285&r2=1418286&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_2.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucket_map_join_2.q Fri Dec 7 11:52:36 2012
@@ -14,7 +14,6 @@ load data local inpath '../data/files/So
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
-set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
-- The tables are bucketed in same columns in different order,
-- but sorted in different column orders
Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin13.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin13.q?rev=1418286&r1=1418285&r2=1418286&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin13.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin13.q Fri Dec 7 11:52:36 2012
@@ -22,7 +22,6 @@ CLUSTERED BY (key) INTO 2 BUCKETS;
INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
SELECT * FROM src;
-set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
set hive.optimize.bucketmapjoin=true;
-- part=1 partition for srcbucket_mapjoin_part_1 is bucketed by 'value'
Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin6.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin6.q?rev=1418286&r1=1418285&r2=1418286&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin6.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin6.q Fri Dec 7 11:52:36 2012
@@ -15,8 +15,6 @@ insert overwrite table tmp2 select * fro
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.merge.mapfiles=false;
-set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
-
create table tmp3 (a string, b string, c string) clustered by (a) sorted by (a) into 10 buckets;