You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/09/15 17:26:44 UTC
svn commit: r1385084 [2/3] - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/ java/org/apache/hadoop/hive/ql/metadata/
java/org/apache/hadoop/hive/ql/parse/ test/queries/clientnegative/
test/queries/clientpositive/ test/results/clientnegative/ te...
Added: hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin11.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin11.q.out?rev=1385084&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin11.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin11.q.out Sat Sep 15 15:26:43 2012
@@ -0,0 +1,664 @@
+PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1
+PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 4 BUCKETS
+PREHOOK: type: ALTERTABLE_CLUSTER_SORT
+PREHOOK: Input: default@srcbucket_mapjoin_part_1
+PREHOOK: Output: default@srcbucket_mapjoin_part_1
+POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_1 CLUSTERED BY (key) INTO 4 BUCKETS
+POSTHOOK: type: ALTERTABLE_CLUSTER_SORT
+POSTHOOK: Input: default@srcbucket_mapjoin_part_1
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='2')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=2
+PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING)
+CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING)
+CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1
+PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS
+PREHOOK: type: ALTERTABLE_CLUSTER_SORT
+PREHOOK: Input: default@srcbucket_mapjoin_part_2
+PREHOOK: Output: default@srcbucket_mapjoin_part_2
+POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: ALTERTABLE_CLUSTER_SORT
+POSTHOOK: Input: default@srcbucket_mapjoin_part_2
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=2
+PREHOOK: query: -- The table and partition bucketing metadata doesn't match but the bucket numbers of all partitions is
+-- a power of 2 and the bucketing columns match so bucket map join should be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The table and partition bucketing metadata doesn't match but the bucket numbers of all partitions is
+-- a power of 2 and the bucketing columns match so bucket map join should be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
+
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage
+ Stage-1 depends on stages: Stage-4
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-4
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ b
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ b
+ TableScan
+ alias: b
+ GatherStats: false
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+ Bucket Mapjoin Context:
+ Alias Bucket Base File Name Mapping:
+ b {part=1/srcbucket20.txt=[part=1/srcbucket20.txt, part=1/srcbucket22.txt, part=2/srcbucket20.txt], part=1/srcbucket21.txt=[part=1/srcbucket21.txt, part=1/srcbucket23.txt, part=2/srcbucket21.txt], part=2/srcbucket20.txt=[part=1/srcbucket20.txt, part=2/srcbucket20.txt], part=2/srcbucket21.txt=[part=1/srcbucket21.txt, part=2/srcbucket21.txt], part=2/srcbucket22.txt=[part=1/srcbucket22.txt, part=2/srcbucket20.txt], part=2/srcbucket23.txt=[part=1/srcbucket23.txt, part=2/srcbucket21.txt]}
+ Alias Bucket File Name Mapping:
+#### A masked pattern was here ####
+ Alias Bucket Output File Name Mapping:
+#### A masked pattern was here ####
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ GatherStats: false
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns
+ columns.types
+ escape.delim \
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Local Work:
+ Map Reduce Local Work
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: part=1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ part 1
+ properties:
+ bucket_count 2
+ bucket_field_name key
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.srcbucket_mapjoin_part_1
+ numFiles 2
+ numPartitions 2
+ numRows 0
+ partition_columns part
+ rawDataSize 0
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2750
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.srcbucket_mapjoin_part_1
+ numFiles 6
+ numPartitions 2
+ numRows 0
+ partition_columns part
+ rawDataSize 0
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 8562
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
+#### A masked pattern was here ####
+ Partition
+ base file name: part=2
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ part 2
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.srcbucket_mapjoin_part_1
+ numFiles 4
+ numPartitions 2
+ numRows 0
+ partition_columns part
+ rawDataSize 0
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.srcbucket_mapjoin_part_1
+ numFiles 6
+ numPartitions 2
+ numRows 0
+ partition_columns part
+ rawDataSize 0
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 8562
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns
+ columns.types
+ escape.delim \
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns
+ columns.types
+ escape.delim \
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: bigint
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1
+PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=2
+PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1
+PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part IS NOT NULL AND b.part IS NOT NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1
+POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=2
+POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1
+POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=2
+#### A masked pattern was here ####
+2420
+PREHOOK: query: EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = b.part AND a.part IS NOT NULL AND b.part IS NOT NULL
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = b.part AND a.part IS NOT NULL AND b.part IS NOT NULL
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (AND (AND (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) (. (TOK_TABLE_OR_COL b) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL a) part))) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) part))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
+
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage
+ Stage-1 depends on stages: Stage-4
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-4
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ b
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ b
+ TableScan
+ alias: b
+ GatherStats: false
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key], Column[part]]
+ 1 [Column[key], Column[part]]
+ Position of Big Table: 0
+ Bucket Mapjoin Context:
+ Alias Bucket Base File Name Mapping:
+ b {part=1/srcbucket20.txt=[part=1/srcbucket20.txt, part=1/srcbucket22.txt, part=2/srcbucket20.txt], part=1/srcbucket21.txt=[part=1/srcbucket21.txt, part=1/srcbucket23.txt, part=2/srcbucket21.txt], part=2/srcbucket20.txt=[part=1/srcbucket20.txt, part=2/srcbucket20.txt], part=2/srcbucket21.txt=[part=1/srcbucket21.txt, part=2/srcbucket21.txt], part=2/srcbucket22.txt=[part=1/srcbucket22.txt, part=2/srcbucket20.txt], part=2/srcbucket23.txt=[part=1/srcbucket23.txt, part=2/srcbucket21.txt]}
+ Alias Bucket File Name Mapping:
+#### A masked pattern was here ####
+ Alias Bucket Output File Name Mapping:
+#### A masked pattern was here ####
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ GatherStats: false
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key], Column[part]]
+ 1 [Column[key], Column[part]]
+ Position of Big Table: 0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns
+ columns.types
+ escape.delim \
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Local Work:
+ Map Reduce Local Work
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: part=1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ part 1
+ properties:
+ bucket_count 2
+ bucket_field_name key
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.srcbucket_mapjoin_part_1
+ numFiles 2
+ numPartitions 2
+ numRows 0
+ partition_columns part
+ rawDataSize 0
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2750
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.srcbucket_mapjoin_part_1
+ numFiles 6
+ numPartitions 2
+ numRows 0
+ partition_columns part
+ rawDataSize 0
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 8562
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
+#### A masked pattern was here ####
+ Partition
+ base file name: part=2
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ part 2
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.srcbucket_mapjoin_part_1
+ numFiles 4
+ numPartitions 2
+ numRows 0
+ partition_columns part
+ rawDataSize 0
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 5812
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count 4
+ bucket_field_name key
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.srcbucket_mapjoin_part_1
+ numFiles 6
+ numPartitions 2
+ numRows 0
+ partition_columns part
+ rawDataSize 0
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 8562
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns
+ columns.types
+ escape.delim \
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns
+ columns.types
+ escape.delim \
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: bigint
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = b.part AND a.part IS NOT NULL AND b.part IS NOT NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1
+PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=2
+PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1
+PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = b.part AND a.part IS NOT NULL AND b.part IS NOT NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1
+POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=2
+POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1
+POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=2
+#### A masked pattern was here ####
+928
Added: hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin12.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin12.q.out?rev=1385084&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin12.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin12.q.out Sat Sep 15 15:26:43 2012
@@ -0,0 +1,525 @@
+PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1
+PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1
+PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 NOT CLUSTERED
+PREHOOK: type: ALTERTABLE_CLUSTER_SORT
+PREHOOK: Input: default@srcbucket_mapjoin_part_2
+PREHOOK: Output: default@srcbucket_mapjoin_part_2
+POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 NOT CLUSTERED
+POSTHOOK: type: ALTERTABLE_CLUSTER_SORT
+POSTHOOK: Input: default@srcbucket_mapjoin_part_2
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2
+PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_3 (key INT, value STRING) PARTITIONED BY (part STRING)
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_3 (key INT, value STRING) PARTITIONED BY (part STRING)
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@srcbucket_mapjoin_part_3
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_3
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_3
+POSTHOOK: Output: default@srcbucket_mapjoin_part_3@part=1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_3@part=1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_3@part=1
+PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_3 CLUSTERED BY (key) INTO 2 BUCKETS
+PREHOOK: type: ALTERTABLE_CLUSTER_SORT
+PREHOOK: Input: default@srcbucket_mapjoin_part_3
+PREHOOK: Output: default@srcbucket_mapjoin_part_3
+POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_3 CLUSTERED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: ALTERTABLE_CLUSTER_SORT
+POSTHOOK: Input: default@srcbucket_mapjoin_part_3
+POSTHOOK: Output: default@srcbucket_mapjoin_part_3
+PREHOOK: query: -- The partition bucketing metadata match but one table is not bucketed, bucket map join should still be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The partition bucketing metadata match but one table is not bucketed, bucket map join should still be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
+
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage
+ Stage-1 depends on stages: Stage-4
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-4
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ b
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ b
+ TableScan
+ alias: b
+ GatherStats: false
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+ Bucket Mapjoin Context:
+ Alias Bucket Base File Name Mapping:
+ b {part=1/srcbucket20.txt=[part=1/srcbucket20.txt], part=1/srcbucket21.txt=[part=1/srcbucket21.txt]}
+ Alias Bucket File Name Mapping:
+#### A masked pattern was here ####
+ Alias Bucket Output File Name Mapping:
+#### A masked pattern was here ####
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ GatherStats: false
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns
+ columns.types
+ escape.delim \
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Local Work:
+ Map Reduce Local Work
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: part=1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ part 1
+ properties:
+ bucket_count 2
+ bucket_field_name key
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.srcbucket_mapjoin_part_1
+ numFiles 2
+ numPartitions 1
+ numRows 0
+ partition_columns part
+ rawDataSize 0
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2750
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count 2
+ bucket_field_name key
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.srcbucket_mapjoin_part_1
+ numFiles 2
+ numPartitions 1
+ numRows 0
+ partition_columns part
+ rawDataSize 0
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2750
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns
+ columns.types
+ escape.delim \
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns
+ columns.types
+ escape.delim \
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: bigint
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1
+PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1
+POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1
+#### A masked pattern was here ####
+464
+PREHOOK: query: -- The table bucketing metadata match but one partition is not bucketed, bucket map join should not be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_3 b
+ON a.key = b.key AND a.part = '1' and b.part = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The table bucketing metadata match but one partition is not bucketed, bucket map join should not be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_3 b
+ON a.key = b.key AND a.part = '1' and b.part = '1'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_3) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
+
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage
+ Stage-1 depends on stages: Stage-4
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-4
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ b
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ b
+ TableScan
+ alias: b
+ GatherStats: false
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ GatherStats: false
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns
+ columns.types
+ escape.delim \
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Local Work:
+ Map Reduce Local Work
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: part=1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ part 1
+ properties:
+ bucket_count 2
+ bucket_field_name key
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.srcbucket_mapjoin_part_1
+ numFiles 2
+ numPartitions 1
+ numRows 0
+ partition_columns part
+ rawDataSize 0
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2750
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count 2
+ bucket_field_name key
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.srcbucket_mapjoin_part_1
+ numFiles 2
+ numPartitions 1
+ numRows 0
+ partition_columns part
+ rawDataSize 0
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2750
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns
+ columns.types
+ escape.delim \
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns
+ columns.types
+ escape.delim \
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: bigint
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_3 b
+ON a.key = b.key AND a.part = '1' and b.part = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1
+PREHOOK: Input: default@srcbucket_mapjoin_part_3@part=1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_3 b
+ON a.key = b.key AND a.part = '1' and b.part = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1
+POSTHOOK: Input: default@srcbucket_mapjoin_part_3@part=1
+#### A masked pattern was here ####
+464
Added: hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin8.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin8.q.out?rev=1385084&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin8.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin8.q.out Sat Sep 15 15:26:43 2012
@@ -0,0 +1,512 @@
+PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_1@part=1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_1 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_1@part=1
+PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key INT, value STRING) PARTITIONED BY (part STRING)
+CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+PREHOOK: type: LOAD
+PREHOOK: Output: default@srcbucket_mapjoin_part_2@part=1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='1')
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2@part=1
+PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS
+PREHOOK: type: ALTERTABLE_CLUSTER_SORT
+PREHOOK: Input: default@srcbucket_mapjoin_part_2
+PREHOOK: Output: default@srcbucket_mapjoin_part_2
+POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS
+POSTHOOK: type: ALTERTABLE_CLUSTER_SORT
+POSTHOOK: Input: default@srcbucket_mapjoin_part_2
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2
+PREHOOK: query: -- The partition bucketing metadata match but the tables have different numbers of buckets, bucket map join should still be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The partition bucketing metadata match but the tables have different numbers of buckets, bucket map join should still be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
+
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage
+ Stage-1 depends on stages: Stage-4
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-4
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ b
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ b
+ TableScan
+ alias: b
+ GatherStats: false
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+ Bucket Mapjoin Context:
+ Alias Bucket Base File Name Mapping:
+ b {part=1/srcbucket20.txt=[part=1/srcbucket20.txt], part=1/srcbucket21.txt=[part=1/srcbucket21.txt]}
+ Alias Bucket File Name Mapping:
+#### A masked pattern was here ####
+ Alias Bucket Output File Name Mapping:
+#### A masked pattern was here ####
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ GatherStats: false
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns
+ columns.types
+ escape.delim \
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Local Work:
+ Map Reduce Local Work
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: part=1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ part 1
+ properties:
+ bucket_count 2
+ bucket_field_name key
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.srcbucket_mapjoin_part_1
+ numFiles 2
+ numPartitions 1
+ numRows 0
+ partition_columns part
+ rawDataSize 0
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2750
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count 2
+ bucket_field_name key
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.srcbucket_mapjoin_part_1
+ numFiles 2
+ numPartitions 1
+ numRows 0
+ partition_columns part
+ rawDataSize 0
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2750
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns
+ columns.types
+ escape.delim \
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns
+ columns.types
+ escape.delim \
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: bigint
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1
+PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1
+POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1
+#### A masked pattern was here ####
+464
+PREHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (value) INTO 2 BUCKETS
+PREHOOK: type: ALTERTABLE_CLUSTER_SORT
+PREHOOK: Input: default@srcbucket_mapjoin_part_2
+PREHOOK: Output: default@srcbucket_mapjoin_part_2
+POSTHOOK: query: ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (value) INTO 2 BUCKETS
+POSTHOOK: type: ALTERTABLE_CLUSTER_SORT
+POSTHOOK: Input: default@srcbucket_mapjoin_part_2
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2
+PREHOOK: query: -- The partition bucketing metadata match but the tables are bucketed on different columns, bucket map join should still be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1'
+PREHOOK: type: QUERY
+POSTHOOK: query: -- The partition bucketing metadata match but the tables are bucketed on different columns, bucket map join should still be used
+
+EXPLAIN EXTENDED
+SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1'
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_1) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (and (AND (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) part) '1')) (= (. (TOK_TABLE_OR_COL b) part) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))
+
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage
+ Stage-1 depends on stages: Stage-4
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-4
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ b
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ b
+ TableScan
+ alias: b
+ GatherStats: false
+ HashTable Sink Operator
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+ Bucket Mapjoin Context:
+ Alias Bucket Base File Name Mapping:
+ b {part=1/srcbucket20.txt=[part=1/srcbucket20.txt], part=1/srcbucket21.txt=[part=1/srcbucket21.txt]}
+ Alias Bucket File Name Mapping:
+#### A masked pattern was here ####
+ Alias Bucket Output File Name Mapping:
+#### A masked pattern was here ####
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ GatherStats: false
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0
+ 1
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns
+ columns.types
+ escape.delim \
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Local Work:
+ Map Reduce Local Work
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: part=1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ part 1
+ properties:
+ bucket_count 2
+ bucket_field_name key
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.srcbucket_mapjoin_part_1
+ numFiles 2
+ numPartitions 1
+ numRows 0
+ partition_columns part
+ rawDataSize 0
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2750
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count 2
+ bucket_field_name key
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.srcbucket_mapjoin_part_1
+ numFiles 2
+ numPartitions 1
+ numRows 0
+ partition_columns part
+ rawDataSize 0
+ serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2750
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.srcbucket_mapjoin_part_1
+ name: default.srcbucket_mapjoin_part_1
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Select Operator
+ Select Operator
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ mode: hash
+ outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns
+ columns.types
+ escape.delim \
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns
+ columns.types
+ escape.delim \
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ mode: mergepartial
+ outputColumnNames: _col0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: bigint
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types bigint
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_part_1@part=1
+PREHOOK: Input: default@srcbucket_mapjoin_part_2@part=1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*)
+FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_2 b
+ON a.key = b.key AND a.part = '1' and b.part = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_part_1@part=1
+POSTHOOK: Input: default@srcbucket_mapjoin_part_2@part=1
+#### A masked pattern was here ####
+464