You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2013/05/28 02:54:56 UTC

svn commit: r1486721 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/physical/ test/queries/clientpositive/ test/results/clientpositive/

Author: navis
Date: Tue May 28 00:54:56 2013
New Revision: 1486721

URL: http://svn.apache.org/r1486721
Log:
HIVE-4540 JOIN-GRP BY-DISTINCT fails with NPE when mapjoin.mapreduce=true (Gunther Hagleitner via Navis)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/auto_join33.q
    hive/trunk/ql/src/test/results/clientpositive/auto_join33.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java
    hive/trunk/ql/src/test/results/clientpositive/multiMapJoin1.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java?rev=1486721&r1=1486720&r2=1486721&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java Tue May 28 00:54:56 2013
@@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.Fi
 import org.apache.hadoop.hive.ql.exec.JoinOperator;
 import org.apache.hadoop.hive.ql.exec.MapRedTask;
 import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.exec.TaskFactory;
 import org.apache.hadoop.hive.ql.exec.Utilities;
@@ -364,6 +365,17 @@ public class CommonJoinTaskDispatcher ex
       return;
     }
 
+    // remove the unnecessary TableScan
+    if (childAliasOp instanceof TableScanOperator) {
+      TableScanOperator tso = (TableScanOperator)childAliasOp;
+      if (tso.getNumChild() != 1) {
+        // shouldn't happen
+        return;
+      }
+      childAliasOp = tso.getChildOperators().get(0);
+      childAliasOp.getParentOperators().remove(tso);
+    }
+
     // Merge the 2 trees - remove the FileSinkOperator from the first tree pass it to the
     // top of the second
     Operator<? extends Serializable> parentFOp = mapJoinTaskFileSinkOperator

Added: hive/trunk/ql/src/test/queries/clientpositive/auto_join33.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_join33.q?rev=1486721&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_join33.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_join33.q Tue May 28 00:54:56 2013
@@ -0,0 +1,16 @@
+set hive.auto.convert.join=true;
+set hive.optimize.mapjoin.mapreduce=true;
+
+-- empty tables
+create table studenttab10k (name string, age int, gpa double);
+create table votertab10k (name string, age int, registration string, contributions float);
+
+explain select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name;
+
+select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name;

Added: hive/trunk/ql/src/test/results/clientpositive/auto_join33.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/auto_join33.q.out?rev=1486721&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/auto_join33.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/auto_join33.q.out Tue May 28 00:54:56 2013
@@ -0,0 +1,148 @@
+PREHOOK: query: -- empty tables
+create table studenttab10k (name string, age int, gpa double)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- empty tables
+create table studenttab10k (name string, age int, gpa double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@studenttab10k
+PREHOOK: query: create table votertab10k (name string, age int, registration string, contributions float)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table votertab10k (name string, age int, registration string, contributions float)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@votertab10k
+PREHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k) s) (TOK_TABREF (TOK_TABNAME votertab10k) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v) name)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) name))))
+
+STAGE DEPENDENCIES:
+  Stage-5 is a root stage
+  Stage-4 depends on stages: Stage-5
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-5
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        s 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        s 
+          TableScan
+            alias: s
+            HashTable Sink Operator
+              condition expressions:
+                0 {name}
+                1 {registration}
+              handleSkewJoin: false
+              keys:
+                0 [Column[name]]
+                1 [Column[name]]
+              Position of Big Table: 1
+
+  Stage: Stage-4
+    Map Reduce
+      Alias -> Map Operator Tree:
+        v 
+          TableScan
+            alias: v
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {name}
+                1 {registration}
+              handleSkewJoin: false
+              keys:
+                0 [Column[name]]
+                1 [Column[name]]
+              outputColumnNames: _col0, _col7
+              Position of Big Table: 1
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col7
+                      type: string
+                outputColumnNames: _col0, _col7
+                Group By Operator
+                  aggregations:
+                        expr: count(DISTINCT _col7)
+                  bucketGroup: false
+                  keys:
+                        expr: _col0
+                        type: string
+                        expr: _col7
+                        type: string
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Reduce Output Operator
+                    key expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col1
+                          type: string
+                    sort order: ++
+                    Map-reduce partition columns:
+                          expr: _col0
+                          type: string
+                    tag: -1
+                    value expressions:
+                          expr: _col2
+                          type: bigint
+      Local Work:
+        Map Reduce Local Work
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(DISTINCT KEY._col1:0._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@studenttab10k
+PREHOOK: Input: default@votertab10k
+#### A masked pattern was here ####
+POSTHOOK: query: select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@studenttab10k
+POSTHOOK: Input: default@votertab10k
+#### A masked pattern was here ####

Modified: hive/trunk/ql/src/test/results/clientpositive/multiMapJoin1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/multiMapJoin1.q.out?rev=1486721&r1=1486720&r2=1486721&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/multiMapJoin1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/multiMapJoin1.q.out Tue May 28 00:54:56 2013
@@ -598,18 +598,18 @@ STAGE PLANS:
                               type: string
                         mode: hash
                         outputColumnNames: _col0, _col1
-                          Reduce Output Operator
-                            key expressions:
-                                  expr: _col0
-                                  type: string
-                            sort order: +
-                            Map-reduce partition columns:
-                                  expr: _col0
-                                  type: string
-                            tag: -1
-                            value expressions:
-                                  expr: _col1
-                                  type: bigint
+                        Reduce Output Operator
+                          key expressions:
+                                expr: _col0
+                                type: string
+                          sort order: +
+                          Map-reduce partition columns:
+                                expr: _col0
+                                type: string
+                          tag: -1
+                          value expressions:
+                                expr: _col1
+                                type: bigint
       Local Work:
         Map Reduce Local Work
       Reduce Operator Tree:
@@ -1836,12 +1836,12 @@ STAGE PLANS:
                               bucketGroup: false
                               mode: hash
                               outputColumnNames: _col0
-                                Reduce Output Operator
-                                  sort order: 
-                                  tag: -1
-                                  value expressions:
-                                        expr: _col0
-                                        type: bigint
+                              Reduce Output Operator
+                                sort order: 
+                                tag: -1
+                                value expressions:
+                                      expr: _col0
+                                      type: bigint
       Local Work:
         Map Reduce Local Work
       Reduce Operator Tree: