You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/11/08 05:21:37 UTC
svn commit: r1406918 - in /hive/trunk/ql/src/test:
queries/clientpositive/smb_mapjoin_13.q
results/clientpositive/smb_mapjoin_13.q.out
Author: namit
Date: Thu Nov 8 04:21:36 2012
New Revision: 1406918
URL: http://svn.apache.org/viewvc?rev=1406918&view=rev
Log:
HIVE-3687 smb_mapjoin_13.q is nondeterministic
(Kevin Wilfong via namit)
Modified:
hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_13.q
hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out
Modified: hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_13.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_13.q?rev=1406918&r1=1406917&r2=1406918&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_13.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_13.q Thu Nov 8 04:21:36 2012
@@ -24,13 +24,13 @@ INSERT OVERWRITE TABLE test_table4 SELEC
-- Join data from 2 tables on their respective sorted columns (one each, with different names) and
-- verify sort merge join is used
EXPLAIN EXTENDED
-SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value LIMIT 10;
+SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10;
-SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value LIMIT 10;
+SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10;
-- Join data from 2 tables on their respective columns (two each, with the same names but sorted
-- with different priorities) and verify sort merge join is not used
EXPLAIN EXTENDED
-SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value LIMIT 10;
+SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10;
-SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value LIMIT 10;
+SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10;
Modified: hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out?rev=1406918&r1=1406917&r2=1406918&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out Thu Nov 8 04:21:36 2012
@@ -57,12 +57,12 @@ POSTHOOK: Lineage: test_table4.value SIM
PREHOOK: query: -- Join data from 2 tables on their respective sorted columns (one each, with different names) and
-- verify sort merge join is used
EXPLAIN EXTENDED
-SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value LIMIT 10
+SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10
PREHOOK: type: QUERY
POSTHOOK: query: -- Join data from 2 tables on their respective sorted columns (one each, with different names) and
-- verify sort merge join is used
EXPLAIN EXTENDED
-SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value LIMIT 10
+SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10
POSTHOOK: type: QUERY
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
@@ -73,10 +73,11 @@ POSTHOOK: Lineage: test_table3.value SIM
POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10)))
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10)))
STAGE DEPENDENCIES:
Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
@@ -99,46 +100,21 @@ STAGE PLANS:
1 [Column[value]]
outputColumnNames: _col0, _col1, _col4, _col5
Position of Big Table: 0
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: string
- expr: _col4
- type: int
- expr: _col5
- type: string
- outputColumnNames: _col0, _col1, _col4, _col5
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: string
- expr: _col4
- type: int
- expr: _col5
- type: string
- outputColumnNames: _col0, _col1, _col2, _col3
- Limit
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3
- columns.types int:string:int:string
- escape.delim \
- serialization.format 1
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col4,_col5
+ columns.types int,string,int,string
+ escape.delim \
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Needs Tagging: false
Path -> Alias:
#### A masked pattern was here ####
@@ -192,17 +168,101 @@ STAGE PLANS:
Truncated Path -> Alias:
/test_table1 [a]
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col2
+ type: int
+ expr: _col3
+ type: string
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col4,_col5
+ columns.types int,string,int,string
+ escape.delim \
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col4,_col5
+ columns.types int,string,int,string
+ escape.delim \
+ Reduce Operator Tree:
+ Extract
+ Limit
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types int:string:int:string
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+
Stage: Stage-0
Fetch Operator
limit: 10
-PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value LIMIT 10
+PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10
PREHOOK: type: QUERY
PREHOOK: Input: default@test_table1
PREHOOK: Input: default@test_table2
#### A masked pattern was here ####
-POSTHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value LIMIT 10
+POSTHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@test_table1
POSTHOOK: Input: default@test_table2
@@ -224,16 +284,16 @@ POSTHOOK: Lineage: test_table4.value SIM
0 val_0 0 val_0
0 val_0 0 val_0
0 val_0 0 val_0
-64 val_64 64 val_64
+2 val_2 2 val_2
PREHOOK: query: -- Join data from 2 tables on their respective columns (two each, with the same names but sorted
-- with different priorities) and verify sort merge join is not used
EXPLAIN EXTENDED
-SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value LIMIT 10
+SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10
PREHOOK: type: QUERY
POSTHOOK: query: -- Join data from 2 tables on their respective columns (two each, with the same names but sorted
-- with different priorities) and verify sort merge join is not used
EXPLAIN EXTENDED
-SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value LIMIT 10
+SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10
POSTHOOK: type: QUERY
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
@@ -244,15 +304,16 @@ POSTHOOK: Lineage: test_table3.value SIM
POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
- (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table3) a) (TOK_TABREF (TOK_TABNAME test_table4) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10)))
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table3) a) (TOK_TABREF (TOK_TABNAME test_table4) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10)))
STAGE DEPENDENCIES:
- Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
+ Stage-4 is a root stage
+ Stage-1 depends on stages: Stage-4
+ Stage-2 depends on stages: Stage-1
Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-4
Map Reduce Local Work
Alias -> Map Local Tables:
b
@@ -292,46 +353,21 @@ STAGE PLANS:
1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[value]()]
outputColumnNames: _col0, _col1, _col4, _col5
Position of Big Table: 0
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: string
- expr: _col4
- type: int
- expr: _col5
- type: string
- outputColumnNames: _col0, _col1, _col4, _col5
- Select Operator
- expressions:
- expr: _col0
- type: int
- expr: _col1
- type: string
- expr: _col4
- type: int
- expr: _col5
- type: string
- outputColumnNames: _col0, _col1, _col2, _col3
- Limit
- File Output Operator
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- columns _col0,_col1,_col2,_col3
- columns.types int:string:int:string
- escape.delim \
- serialization.format 1
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col4,_col5
+ columns.types int,string,int,string
+ escape.delim \
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Local Work:
Map Reduce Local Work
Needs Tagging: false
@@ -387,17 +423,101 @@ STAGE PLANS:
Truncated Path -> Alias:
/test_table3 [a]
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col4, _col5
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col4
+ type: int
+ expr: _col5
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ sort order: +
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col2
+ type: int
+ expr: _col3
+ type: string
+ Needs Tagging: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col4,_col5
+ columns.types int,string,int,string
+ escape.delim \
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col4,_col5
+ columns.types int,string,int,string
+ escape.delim \
+ Reduce Operator Tree:
+ Extract
+ Limit
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types int:string:int:string
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+
Stage: Stage-0
Fetch Operator
limit: 10
-PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value LIMIT 10
+PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10
PREHOOK: type: QUERY
PREHOOK: Input: default@test_table3
PREHOOK: Input: default@test_table4
#### A masked pattern was here ####
-POSTHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value LIMIT 10
+POSTHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@test_table3
POSTHOOK: Input: default@test_table4