You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/11/08 05:21:37 UTC

svn commit: r1406918 - in /hive/trunk/ql/src/test: queries/clientpositive/smb_mapjoin_13.q results/clientpositive/smb_mapjoin_13.q.out

Author: namit
Date: Thu Nov  8 04:21:36 2012
New Revision: 1406918

URL: http://svn.apache.org/viewvc?rev=1406918&view=rev
Log:
HIVE-3687 smb_mapjoin_13.q is nondeterministic
(Kevin Wilfong via namit)


Modified:
    hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_13.q
    hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out

Modified: hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_13.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_13.q?rev=1406918&r1=1406917&r2=1406918&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_13.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_13.q Thu Nov  8 04:21:36 2012
@@ -24,13 +24,13 @@ INSERT OVERWRITE TABLE test_table4 SELEC
 -- Join data from 2 tables on their respective sorted columns (one each, with different names) and
 -- verify sort merge join is used
 EXPLAIN EXTENDED
-SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value LIMIT 10;
+SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10;
 
-SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value LIMIT 10;
+SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10;
 
 -- Join data from 2 tables on their respective columns (two each, with the same names but sorted
 -- with different priorities) and verify sort merge join is not used
 EXPLAIN EXTENDED
-SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value LIMIT 10;
+SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10;
 
-SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value LIMIT 10;
+SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10;

Modified: hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out?rev=1406918&r1=1406917&r2=1406918&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out Thu Nov  8 04:21:36 2012
@@ -57,12 +57,12 @@ POSTHOOK: Lineage: test_table4.value SIM
 PREHOOK: query: -- Join data from 2 tables on their respective sorted columns (one each, with different names) and
 -- verify sort merge join is used
 EXPLAIN EXTENDED
-SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value LIMIT 10
+SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10
 PREHOOK: type: QUERY
 POSTHOOK: query: -- Join data from 2 tables on their respective sorted columns (one each, with different names) and
 -- verify sort merge join is used
 EXPLAIN EXTENDED
-SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value LIMIT 10
+SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10
 POSTHOOK: type: QUERY
 POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
@@ -73,10 +73,11 @@ POSTHOOK: Lineage: test_table3.value SIM
 POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
 ABSTRACT SYNTAX TREE:
-  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10)))
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10)))
 
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
   Stage-0 is a root stage
 
 STAGE PLANS:
@@ -99,46 +100,21 @@ STAGE PLANS:
                 1 [Column[value]]
               outputColumnNames: _col0, _col1, _col4, _col5
               Position of Big Table: 0
-              Select Operator
-                expressions:
-                      expr: _col0
-                      type: int
-                      expr: _col1
-                      type: string
-                      expr: _col4
-                      type: int
-                      expr: _col5
-                      type: string
-                outputColumnNames: _col0, _col1, _col4, _col5
-                Select Operator
-                  expressions:
-                        expr: _col0
-                        type: int
-                        expr: _col1
-                        type: string
-                        expr: _col4
-                        type: int
-                        expr: _col5
-                        type: string
-                  outputColumnNames: _col0, _col1, _col2, _col3
-                  Limit
-                    File Output Operator
-                      compressed: false
-                      GlobalTableId: 0
-#### A masked pattern was here ####
-                      NumFilesPerFileSink: 1
-#### A masked pattern was here ####
-                      table:
-                          input format: org.apache.hadoop.mapred.TextInputFormat
-                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          properties:
-                            columns _col0,_col1,_col2,_col3
-                            columns.types int:string:int:string
-                            escape.delim \
-                            serialization.format 1
-                      TotalFiles: 1
-                      GatherStats: false
-                      MultiFileSpray: false
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+#### A masked pattern was here ####
+                NumFilesPerFileSink: 1
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    properties:
+                      columns _col0,_col1,_col4,_col5
+                      columns.types int,string,int,string
+                      escape.delim \
+                TotalFiles: 1
+                GatherStats: false
+                MultiFileSpray: false
       Needs Tagging: false
       Path -> Alias:
 #### A masked pattern was here ####
@@ -192,17 +168,101 @@ STAGE PLANS:
       Truncated Path -> Alias:
         /test_table1 [a]
 
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: int
+                  expr: _col1
+                  type: string
+                  expr: _col4
+                  type: int
+                  expr: _col5
+                  type: string
+            outputColumnNames: _col0, _col1, _col4, _col5
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: int
+                    expr: _col1
+                    type: string
+                    expr: _col4
+                    type: int
+                    expr: _col5
+                    type: string
+              outputColumnNames: _col0, _col1, _col2, _col3
+              Reduce Output Operator
+                key expressions:
+                      expr: _col0
+                      type: int
+                sort order: +
+                tag: -1
+                value expressions:
+                      expr: _col0
+                      type: int
+                      expr: _col1
+                      type: string
+                      expr: _col2
+                      type: int
+                      expr: _col3
+                      type: string
+      Needs Tagging: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10002
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0,_col1,_col4,_col5
+              columns.types int,string,int,string
+              escape.delim \
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0,_col1,_col4,_col5
+                columns.types int,string,int,string
+                escape.delim \
+      Reduce Operator Tree:
+        Extract
+          Limit
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    columns _col0,_col1,_col2,_col3
+                    columns.types int:string:int:string
+                    escape.delim \
+                    serialization.format 1
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+
   Stage: Stage-0
     Fetch Operator
       limit: 10
 
 
-PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value LIMIT 10
+PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10
 PREHOOK: type: QUERY
 PREHOOK: Input: default@test_table1
 PREHOOK: Input: default@test_table2
 #### A masked pattern was here ####
-POSTHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value LIMIT 10
+POSTHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test_table1
 POSTHOOK: Input: default@test_table2
@@ -224,16 +284,16 @@ POSTHOOK: Lineage: test_table4.value SIM
 0	val_0	0	val_0
 0	val_0	0	val_0
 0	val_0	0	val_0
-64	val_64	64	val_64
+2	val_2	2	val_2
 PREHOOK: query: -- Join data from 2 tables on their respective columns (two each, with the same names but sorted
 -- with different priorities) and verify sort merge join is not used
 EXPLAIN EXTENDED
-SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value LIMIT 10
+SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10
 PREHOOK: type: QUERY
 POSTHOOK: query: -- Join data from 2 tables on their respective columns (two each, with the same names but sorted
 -- with different priorities) and verify sort merge join is not used
 EXPLAIN EXTENDED
-SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value LIMIT 10
+SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10
 POSTHOOK: type: QUERY
 POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
@@ -244,15 +304,16 @@ POSTHOOK: Lineage: test_table3.value SIM
 POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
 ABSTRACT SYNTAX TREE:
-  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table3) a) (TOK_TABREF (TOK_TABNAME test_table4) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 10)))
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table3) a) (TOK_TABREF (TOK_TABNAME test_table4) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10)))
 
 STAGE DEPENDENCIES:
-  Stage-3 is a root stage
-  Stage-1 depends on stages: Stage-3
+  Stage-4 is a root stage
+  Stage-1 depends on stages: Stage-4
+  Stage-2 depends on stages: Stage-1
   Stage-0 is a root stage
 
 STAGE PLANS:
-  Stage: Stage-3
+  Stage: Stage-4
     Map Reduce Local Work
       Alias -> Map Local Tables:
         b 
@@ -292,46 +353,21 @@ STAGE PLANS:
                 1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[value]()]
               outputColumnNames: _col0, _col1, _col4, _col5
               Position of Big Table: 0
-              Select Operator
-                expressions:
-                      expr: _col0
-                      type: int
-                      expr: _col1
-                      type: string
-                      expr: _col4
-                      type: int
-                      expr: _col5
-                      type: string
-                outputColumnNames: _col0, _col1, _col4, _col5
-                Select Operator
-                  expressions:
-                        expr: _col0
-                        type: int
-                        expr: _col1
-                        type: string
-                        expr: _col4
-                        type: int
-                        expr: _col5
-                        type: string
-                  outputColumnNames: _col0, _col1, _col2, _col3
-                  Limit
-                    File Output Operator
-                      compressed: false
-                      GlobalTableId: 0
-#### A masked pattern was here ####
-                      NumFilesPerFileSink: 1
-#### A masked pattern was here ####
-                      table:
-                          input format: org.apache.hadoop.mapred.TextInputFormat
-                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                          properties:
-                            columns _col0,_col1,_col2,_col3
-                            columns.types int:string:int:string
-                            escape.delim \
-                            serialization.format 1
-                      TotalFiles: 1
-                      GatherStats: false
-                      MultiFileSpray: false
+              File Output Operator
+                compressed: false
+                GlobalTableId: 0
+#### A masked pattern was here ####
+                NumFilesPerFileSink: 1
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    properties:
+                      columns _col0,_col1,_col4,_col5
+                      columns.types int,string,int,string
+                      escape.delim \
+                TotalFiles: 1
+                GatherStats: false
+                MultiFileSpray: false
       Local Work:
         Map Reduce Local Work
       Needs Tagging: false
@@ -387,17 +423,101 @@ STAGE PLANS:
       Truncated Path -> Alias:
         /test_table3 [a]
 
+  Stage: Stage-2
+    Map Reduce
+      Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: int
+                  expr: _col1
+                  type: string
+                  expr: _col4
+                  type: int
+                  expr: _col5
+                  type: string
+            outputColumnNames: _col0, _col1, _col4, _col5
+            Select Operator
+              expressions:
+                    expr: _col0
+                    type: int
+                    expr: _col1
+                    type: string
+                    expr: _col4
+                    type: int
+                    expr: _col5
+                    type: string
+              outputColumnNames: _col0, _col1, _col2, _col3
+              Reduce Output Operator
+                key expressions:
+                      expr: _col0
+                      type: int
+                sort order: +
+                tag: -1
+                value expressions:
+                      expr: _col0
+                      type: int
+                      expr: _col1
+                      type: string
+                      expr: _col2
+                      type: int
+                      expr: _col3
+                      type: string
+      Needs Tagging: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: -mr-10002
+            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+            properties:
+              columns _col0,_col1,_col4,_col5
+              columns.types int,string,int,string
+              escape.delim \
+          
+              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+              properties:
+                columns _col0,_col1,_col4,_col5
+                columns.types int,string,int,string
+                escape.delim \
+      Reduce Operator Tree:
+        Extract
+          Limit
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    columns _col0,_col1,_col2,_col3
+                    columns.types int:string:int:string
+                    escape.delim \
+                    serialization.format 1
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
+      Truncated Path -> Alias:
+#### A masked pattern was here ####
+
   Stage: Stage-0
     Fetch Operator
       limit: 10
 
 
-PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value LIMIT 10
+PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10
 PREHOOK: type: QUERY
 PREHOOK: Input: default@test_table3
 PREHOOK: Input: default@test_table4
 #### A masked pattern was here ####
-POSTHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value LIMIT 10
+POSTHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test_table3
 POSTHOOK: Input: default@test_table4