You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/09/15 17:26:44 UTC

svn commit: r1385084 [1/3] - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/ java/org/apache/hadoop/hive/ql/metadata/ java/org/apache/hadoop/hive/ql/parse/ test/queries/clientnegative/ test/queries/clientpositive/ test/results/clientnegative/ te...

Author: namit
Date: Sat Sep 15 15:26:43 2012
New Revision: 1385084

URL: http://svn.apache.org/viewvc?rev=1385084&view=rev
Log:
HIVE-3283 bucket information should be used from the partition instead of the table
(Kevin Wilfong via namit)


Added:
    hive/trunk/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q
    hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin10.q
    hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin11.q
    hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin12.q
    hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin8.q
    hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin9.q
    hive/trunk/ql/src/test/queries/clientpositive/sort_merge_join_desc_5.q
    hive/trunk/ql/src/test/queries/clientpositive/sort_merge_join_desc_6.q
    hive/trunk/ql/src/test/queries/clientpositive/sort_merge_join_desc_7.q
    hive/trunk/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin10.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin11.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin12.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin8.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin9.q.out
    hive/trunk/ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out
    hive/trunk/ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out
    hive/trunk/ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out
Removed:
    hive/trunk/ql/src/test/queries/clientnegative/alter_numbuckets_partitioned_table.q
    hive/trunk/ql/src/test/results/clientnegative/alter_numbuckets_partitioned_table.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java?rev=1385084&r1=1385083&r2=1385084&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java Sat Sep 15 15:26:43 2012
@@ -216,9 +216,6 @@ public enum ErrorMsg {
   UDAF_INVALID_LOCATION(10128, "Not yet supported place for UDAF"),
   DROP_PARTITION_NON_STRING_PARTCOLS_NONEQUALITY(10129,
     "Drop partitions for a non string partition columns is not allowed using non-equality"),
-  NUM_BUCKETS_CHANGE_NOT_ALLOWED(10130, "Changing the number of buckets for a " +
-    "partitioned table is not allowed. It may lead to wrong results for " +
-    "older partitions"),
   ALTER_COMMAND_FOR_VIEWS(10131, "To alter a view you need to use the ALTER VIEW command."),
   ALTER_COMMAND_FOR_TABLES(10132, "To alter a base table you need to use the ALTER TABLE command."),
   ALTER_VIEW_DISALLOWED_OP(10133, "Cannot use this form of ALTER on a view"),

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java?rev=1385084&r1=1385083&r2=1385084&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java Sat Sep 15 15:26:43 2012
@@ -344,12 +344,8 @@ public class Partition implements Serial
     return outputFormatClass;
   }
 
-  /**
-   * The number of buckets is a property of the partition. However - internally
-   * we are just storing it as a property of the table as a short term measure.
-   */
   public int getBucketCount() {
-    return table.getNumBuckets();
+    return tPartition.getSd().getNumBuckets();
     /*
      * TODO: Keeping this code around for later use when we will support
      * sampling on tables which are not created with CLUSTERED INTO clause

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1385084&r1=1385083&r2=1385084&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Sat Sep 15 15:26:43 2012
@@ -1324,24 +1324,6 @@ public class DDLSemanticAnalyzer extends
         throw new SemanticException(ErrorMsg.INVALID_BUCKET_NUMBER.getMsg());
       }
 
-      // If the table is partitioned, the number of buckets cannot be changed
-      // (unless the table is empty).
-      // The hive code uses bucket information from the table, and changing the
-      // number of buckets can lead to wrong results for bucketed join/sampling
-      // etc. This should be fixed as part of HIVE-3283.
-      // Once the above jira is fixed, this error check/message should be removed
-      if (tab.isPartitioned()) {
-        try {
-          List<String> partitionNames = db.getPartitionNames(tableName, (short)1);
-          if ((partitionNames != null) && (!partitionNames.isEmpty())) {
-            throw new
-              SemanticException(ErrorMsg.NUM_BUCKETS_CHANGE_NOT_ALLOWED.getMsg());
-          }
-        } catch (HiveException e) {
-          throw new SemanticException(e.getMessage());
-        }
-      }
-
       AlterTableDesc alterTblDesc = new AlterTableDesc(tableName, numBuckets,
           bucketCols, sortCols);
       rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),

Added: hive/trunk/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q?rev=1385084&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/alter_numbuckets_partitioned_table.q Sat Sep 15 15:26:43 2012
@@ -0,0 +1,45 @@
+
+create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets;
+
+alter table tst1 clustered by (key) into 8 buckets;
+
+describe formatted tst1;
+
+set hive.enforce.bucketing=true;
+insert overwrite table tst1 partition (ds='1') select key, value from src;
+
+describe formatted tst1 partition (ds = '1');
+
+-- Test changing bucket number
+
+alter table tst1 clustered by (key) into 12 buckets;
+
+insert overwrite table tst1 partition (ds='1') select key, value from src;
+
+describe formatted tst1 partition (ds = '1');
+
+describe formatted tst1;
+
+-- Test adding sort order
+
+alter table tst1 clustered by (key) sorted by (key asc) into 12 buckets;
+
+describe formatted tst1;
+
+-- Test changing sort order
+
+alter table tst1 clustered by (key) sorted by (value desc) into 12 buckets;
+
+describe formatted tst1;
+
+-- Test removing test order
+
+alter table tst1 clustered by (value) into 12 buckets;
+
+describe formatted tst1;
+
+-- Test removing buckets
+
+alter table tst1 not clustered;
+
+describe formatted tst1;

Added: hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin10.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin10.q?rev=1385084&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin10.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin10.q Sat Sep 15 15:26:43 2012
@@ -0,0 +1,36 @@
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+
+ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 3 BUCKETS;
+LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+
+CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+
+ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS;
+LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
+
+ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS;
+
+set hive.optimize.bucketmapjoin=true;
+
+-- The table bucketing metadata matches but the partition metadata does not, bucket map join should not be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL;
+
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL;

Added: hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin11.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin11.q?rev=1385084&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin11.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin11.q Sat Sep 15 15:26:43 2012
@@ -0,0 +1,47 @@
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+
+ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 4 BUCKETS;
+LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2');
+
+CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+
+ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS;
+LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2');
+
+
+set hive.optimize.bucketmapjoin=true;
+
+-- The table and partition bucketing metadata doesn't match but the bucket numbers of all partitions is
+-- a power of 2 and the bucketing columns match so bucket map join should be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL;
+
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL;
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = b.part AND a.part IS NOT NULL AND b.part IS NOT NULL;
+
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = b.part AND a.part IS NOT NULL AND b.part IS NOT NULL;

Added: hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin12.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin12.q?rev=1385084&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin12.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin12.q Sat Sep 15 15:26:43 2012
@@ -0,0 +1,44 @@
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+
+CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+
+ALTER TABLE srcbucket_mapjoin_part_2 NOT CLUSTERED;
+
+CREATE TABLE srcbucket_mapjoin_part_3 (key INT, value STRING) PARTITIONED BY (part STRING)
+STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1');
+
+ALTER TABLE srcbucket_mapjoin_part_3 CLUSTERED BY (key) INTO 2 BUCKETS;
+
+set hive.optimize.bucketmapjoin=true;
+
+-- The partition bucketing metadata match but one table is not bucketed, bucket map join should still be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1';
+
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1';
+
+-- The table bucketing metadata match but one partition is not bucketed, bucket map join should not be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_3 b
+ON a.key = b.key AND a.part = '1' and b.part = '1';
+
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_3 b
+ON a.key = b.key AND a.part = '1' and b.part = '1';

Added: hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin8.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin8.q?rev=1385084&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin8.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin8.q Sat Sep 15 15:26:43 2012
@@ -0,0 +1,39 @@
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+
+CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+
+ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS;
+
+set hive.optimize.bucketmapjoin=true;
+
+-- The partition bucketing metadata match but the tables have different numbers of buckets, bucket map join should still be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1';
+
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1';
+
+ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (value) INTO 2 BUCKETS;
+
+-- The partition bucketing metadata match but the tables are bucketed on different columns, bucket map join should still be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1';
+
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1';

Added: hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin9.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin9.q?rev=1385084&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin9.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin9.q Sat Sep 15 15:26:43 2012
@@ -0,0 +1,45 @@
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1');
+
+CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+
+ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS;
+
+set hive.optimize.bucketmapjoin=true;
+
+-- The table bucketing metadata matches but the partitions have different numbers of buckets, bucket map join should not be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1';
+
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1';
+
+ALTER TABLE srcbucket_mapjoin_part_2 DROP PARTITION (part='1');
+ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (value) INTO 2 BUCKETS;
+LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1');
+
+ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS;
+
+-- The table bucketing metadata matches but the partitions are bucketed on different columns, bucket map join should not be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' AND b.part = '1';
+
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' AND b.part = '1';

Added: hive/trunk/ql/src/test/queries/clientpositive/sort_merge_join_desc_5.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/sort_merge_join_desc_5.q?rev=1385084&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/sort_merge_join_desc_5.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/sort_merge_join_desc_5.q Sat Sep 15 15:26:43 2012
@@ -0,0 +1,26 @@
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+
+CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key) SORTED BY (key DESC) INTO 1 BUCKETS;
+INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src;
+
+CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key) SORTED BY (key DESC) INTO 1 BUCKETS;
+INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src;
+
+ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) SORTED BY (value DESC) INTO 1 BUCKETS;
+
+set hive.optimize.bucketmapjoin=true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+
+-- The partition sorting metadata matches but the table metadata does not, sorted merge join should still be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' AND b.part = '1';
+
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' AND b.part = '1';

Added: hive/trunk/ql/src/test/queries/clientpositive/sort_merge_join_desc_6.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/sort_merge_join_desc_6.q?rev=1385084&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/sort_merge_join_desc_6.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/sort_merge_join_desc_6.q Sat Sep 15 15:26:43 2012
@@ -0,0 +1,27 @@
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.exec.reducers.max = 1;
+
+CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS;
+INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src;
+
+CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key) SORTED BY (value DESC) INTO 2 BUCKETS;
+INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src;
+
+ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS;
+
+set hive.optimize.bucketmapjoin=true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+
+-- The table sorting metadata matches but the partition metadata does not, sorted merge join should not be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' AND b.part = '1';
+
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' AND b.part = '1';

Added: hive/trunk/ql/src/test/queries/clientpositive/sort_merge_join_desc_7.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/sort_merge_join_desc_7.q?rev=1385084&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/sort_merge_join_desc_7.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/sort_merge_join_desc_7.q Sat Sep 15 15:26:43 2012
@@ -0,0 +1,33 @@
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.exec.reducers.max = 1;
+
+CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key, value) SORTED BY (key DESC) INTO 2 BUCKETS;
+INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='1') SELECT * FROM src;
+
+ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key, value) SORTED BY (value DESC) INTO 2 BUCKETS;
+INSERT OVERWRITE TABLE srcbucket_mapjoin_part_1 PARTITION (part='2') SELECT * FROM src;
+
+CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key, value) SORTED BY (value DESC) INTO 2 BUCKETS;
+INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src;
+
+ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key, value) SORTED BY (key DESC) INTO 2 BUCKETS;
+INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='2') SELECT * FROM src;
+
+ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key, value) SORTED BY (value DESC) INTO 2 BUCKETS;
+
+set hive.optimize.bucketmapjoin=true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+
+-- The table sorting metadata matches but the partition metadata does not, sorted merge join should not be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL;
+
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL;

Added: hive/trunk/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out?rev=1385084&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table.q.out Sat Sep 15 15:26:43 2012
@@ -0,0 +1,449 @@
+PREHOOK: query: create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table tst1(key string, value string) partitioned by (ds string) clustered by (key) into 10 buckets
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@tst1
+PREHOOK: query: alter table tst1 clustered by (key) into 8 buckets
+PREHOOK: type: ALTERTABLE_CLUSTER_SORT
+PREHOOK: Input: default@tst1
+PREHOOK: Output: default@tst1
+POSTHOOK: query: alter table tst1 clustered by (key) into 8 buckets
+POSTHOOK: type: ALTERTABLE_CLUSTER_SORT
+POSTHOOK: Input: default@tst1
+POSTHOOK: Output: default@tst1
+PREHOOK: query: describe formatted tst1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted tst1
+POSTHOOK: type: DESCTABLE
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+value               	string              	None                
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+ds                  	string              	None                
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	8                   	 
+Bucket Columns:     	[key]               	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: insert overwrite table tst1 partition (ds='1') select key, value from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tst1@ds=1
+POSTHOOK: query: insert overwrite table tst1 partition (ds='1') select key, value from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tst1@ds=1
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: describe formatted tst1 partition (ds = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted tst1 partition (ds = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+value               	string              	None                
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+ds                  	string              	None                
+	 	 
+# Detailed Partition Information	 	 
+Partition Value:    	[1]                 	 
+Database:           	default             	 
+Table:              	tst1                	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+#### A masked pattern was here ####
+Partition Parameters:	 	 
+	numFiles            	1                   
+	numRows             	500                 
+	rawDataSize         	5312                
+	totalSize           	5812                
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	8                   	 
+Bucket Columns:     	[key]               	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: -- Test changing bucket number
+
+alter table tst1 clustered by (key) into 12 buckets
+PREHOOK: type: ALTERTABLE_CLUSTER_SORT
+PREHOOK: Input: default@tst1
+PREHOOK: Output: default@tst1
+POSTHOOK: query: -- Test changing bucket number
+
+alter table tst1 clustered by (key) into 12 buckets
+POSTHOOK: type: ALTERTABLE_CLUSTER_SORT
+POSTHOOK: Input: default@tst1
+POSTHOOK: Output: default@tst1
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table tst1 partition (ds='1') select key, value from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tst1@ds=1
+POSTHOOK: query: insert overwrite table tst1 partition (ds='1') select key, value from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tst1@ds=1
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: describe formatted tst1 partition (ds = '1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted tst1 partition (ds = '1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+value               	string              	None                
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+ds                  	string              	None                
+	 	 
+# Detailed Partition Information	 	 
+Partition Value:    	[1]                 	 
+Database:           	default             	 
+Table:              	tst1                	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+#### A masked pattern was here ####
+Partition Parameters:	 	 
+	numFiles            	1                   
+	numRows             	500                 
+	rawDataSize         	5312                
+	totalSize           	5812                
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	8                   	 
+Bucket Columns:     	[key]               	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: describe formatted tst1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted tst1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+value               	string              	None                
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+ds                  	string              	None                
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	numFiles            	1                   
+	numPartitions       	1                   
+	numRows             	500                 
+	rawDataSize         	5312                
+	totalSize           	5812                
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	12                  	 
+Bucket Columns:     	[key]               	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: -- Test adding sort order
+
+alter table tst1 clustered by (key) sorted by (key asc) into 12 buckets
+PREHOOK: type: ALTERTABLE_CLUSTER_SORT
+PREHOOK: Input: default@tst1
+PREHOOK: Output: default@tst1
+POSTHOOK: query: -- Test adding sort order
+
+alter table tst1 clustered by (key) sorted by (key asc) into 12 buckets
+POSTHOOK: type: ALTERTABLE_CLUSTER_SORT
+POSTHOOK: Input: default@tst1
+POSTHOOK: Output: default@tst1
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: describe formatted tst1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted tst1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+value               	string              	None                
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+ds                  	string              	None                
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	numFiles            	1                   
+	numPartitions       	1                   
+	numRows             	500                 
+	rawDataSize         	5312                
+	totalSize           	5812                
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	12                  	 
+Bucket Columns:     	[key]               	 
+Sort Columns:       	[Order(col:key, order:1)]	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: -- Test changing sort order
+
+alter table tst1 clustered by (key) sorted by (value desc) into 12 buckets
+PREHOOK: type: ALTERTABLE_CLUSTER_SORT
+PREHOOK: Input: default@tst1
+PREHOOK: Output: default@tst1
+POSTHOOK: query: -- Test changing sort order
+
+alter table tst1 clustered by (key) sorted by (value desc) into 12 buckets
+POSTHOOK: type: ALTERTABLE_CLUSTER_SORT
+POSTHOOK: Input: default@tst1
+POSTHOOK: Output: default@tst1
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: describe formatted tst1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted tst1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+value               	string              	None                
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+ds                  	string              	None                
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	numFiles            	1                   
+	numPartitions       	1                   
+	numRows             	500                 
+	rawDataSize         	5312                
+	totalSize           	5812                
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	12                  	 
+Bucket Columns:     	[key]               	 
+Sort Columns:       	[Order(col:value, order:0)]	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: -- Test removing test order
+
+alter table tst1 clustered by (value) into 12 buckets
+PREHOOK: type: ALTERTABLE_CLUSTER_SORT
+PREHOOK: Input: default@tst1
+PREHOOK: Output: default@tst1
+POSTHOOK: query: -- Test removing test order
+
+alter table tst1 clustered by (value) into 12 buckets
+POSTHOOK: type: ALTERTABLE_CLUSTER_SORT
+POSTHOOK: Input: default@tst1
+POSTHOOK: Output: default@tst1
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: describe formatted tst1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted tst1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+value               	string              	None                
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+ds                  	string              	None                
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	numFiles            	1                   
+	numPartitions       	1                   
+	numRows             	500                 
+	rawDataSize         	5312                
+	totalSize           	5812                
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	12                  	 
+Bucket Columns:     	[value]             	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   
+PREHOOK: query: -- Test removing buckets
+
+alter table tst1 not clustered
+PREHOOK: type: ALTERTABLE_CLUSTER_SORT
+PREHOOK: Input: default@tst1
+PREHOOK: Output: default@tst1
+POSTHOOK: query: -- Test removing buckets
+
+alter table tst1 not clustered
+POSTHOOK: type: ALTERTABLE_CLUSTER_SORT
+POSTHOOK: Input: default@tst1
+POSTHOOK: Output: default@tst1
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: describe formatted tst1
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted tst1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tst1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+value               	string              	None                
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+ds                  	string              	None                
+	 	 
+# Detailed Table Information	 	 
+Database:           	default             	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+Retention:          	0                   	 
+#### A masked pattern was here ####
+Table Type:         	MANAGED_TABLE       	 
+Table Parameters:	 	 
+#### A masked pattern was here ####
+	numFiles            	1                   
+	numPartitions       	1                   
+	numRows             	500                 
+	rawDataSize         	5312                
+	totalSize           	5812                
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
+InputFormat:        	org.apache.hadoop.mapred.TextInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+Storage Desc Params:	 	 
+	serialization.format	1                   

Added: hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin10.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin10.q.out?rev=1385084&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin10.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin10.q.out Sat Sep 15 15:26:43 2012
@@ -0,0 +1,375 @@
+PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1
+PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 3 BUCKETS
+PREHOOK: type: ALTERTABLE_CLUSTER_SORT
+PREHOOK: Input: default@srcbucket_mapjoin_part_1
+PREHOOK: Output: default@srcbucket_mapjoin_part_1
+POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 3 BUCKETS
+POSTHOOK: type: ALTERTABLE_CLUSTER_SORT
+POSTHOOK: Input: default@srcbucket_mapjoin_part_1
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2
+PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING) 
+CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1
+PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS
+PREHOOK: type: ALTERTABLE_CLUSTER_SORT
+PREHOOK: Input: default@srcbucket_mapjoin_part_2
+PREHOOK: Output: default@srcbucket_mapjoin_part_2
+POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: ALTERTABLE_CLUSTER_SORT
+POSTHOOK: Input: default@srcbucket_mapjoin_part_2
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=2
+PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS
+PREHOOK: type: ALTERTABLE_CLUSTER_SORT
+PREHOOK: Input: default@srcbucket_mapjoin_part_2
+PREHOOK: Output: default@srcbucket_mapjoin_part_2
+POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS
+POSTHOOK: type: ALTERTABLE_CLUSTER_SORT
+POSTHOOK: Input: default@srcbucket_mapjoin_part_2
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2
+PREHOOK: query: -- The table bucketing metadata matches but the partition metadata does not, bucket map join should not be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The table bucketing metadata matches but the partition metadata does not, bucket map join should not be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
+
+STAGE DEPENDENCIES:
+  Stage-4 is a root stage
+  Stage-1 depends on stages: Stage-4
+  Stage-2 depends on stages: Stage-1
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-4
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        b 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        b 
+          TableScan
+            alias: b
+            GatherStats: false
+            HashTable Sink Operator
+              condition expressions:
+                0 
+                1 
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              Position of Big Table: 0
+
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            GatherStats: false
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 
+                1 
+              handleSkewJoin: false
+              keys:
+                0 [Column[key]]
+                1 [Column[key]]
+              Position of Big Table: 0
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+#### A masked pattern was here ####
+                NumFilesPerFileSink: 1
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    properties:
+                      columns 
+                      columns.types 
+                      escape.delim \
+                TotalFiles: 1
+                GatherStats: false
+                MultiFileSpray: false
+      Local Work:
+        Map Reduce Local Work
+      Needs Tagging: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: part=1
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              part 1
+            properties:
+              bucket_count 2
+              bucket_field_name key
+              columns key,value
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.srcbucket_mapjoin_part_1
+              numFiles 2
+              numPartitions 2
+              numRows 0
+              partition_columns part
+              rawDataSize 0
+              serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 2750
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count 3
+                bucket_field_name key
+                columns key,value
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.srcbucket_mapjoin_part_1
+                numFiles 5
+                numPartitions 2
+                numRows 0
+                partition_columns part
+                rawDataSize 0
+                serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 6950
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.srcbucket_mapjoin_part_1
+            name: default.srcbucket_mapjoin_part_1
+#### A masked pattern was here ####
+          Partition
+            base file name: part=2
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              part 2
+            properties:
+              bucket_count 3
+              bucket_field_name key
+              columns key,value
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.srcbucket_mapjoin_part_1
+              numFiles 3
+              numPartitions 2
+              numRows 0
+              partition_columns part
+              rawDataSize 0
+              serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 4200
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count 3
+                bucket_field_name key
+                columns key,value
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.srcbucket_mapjoin_part_1
+                numFiles 5
+                numPartitions 2
+                numRows 0
+                partition_columns part
+                rawDataSize 0
+                serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 6950
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.srcbucket_mapjoin_part_1
+            name: default.srcbucket_mapjoin_part_1
+
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+          Select Operator
+            Select Operator
+              Group By Operator
+                aggregations:
+                      expr: count()
+                bucketGroup: false
+                mode: hash
+                outputColumnNames: _col0
+                Reduce Output Operator
+                  sort order: 
+                  tag: -1
+                  value expressions:
+                        expr: _col0
+                        type: bigint
+      Needs Tagging: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10002
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns 
+              columns.types 
+              escape.delim \
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns 
+                columns.types 
+                escape.delim \
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          mode: mergepartial
+          outputColumnNames: _col0
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: bigint
+            outputColumnNames: _col0
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    columns _col0
+                    columns.types bigint
+                    escape.delim \
+                    serialization.format 1
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1
+PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=2
+PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1
+PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1
+POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=2
+POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1
+POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=2
+#### A masked pattern was here ####
+2116