You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2013/05/28 02:54:56 UTC
svn commit: r1486721 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/optimizer/physical/
test/queries/clientpositive/ test/results/clientpositive/
Author: navis
Date: Tue May 28 00:54:56 2013
New Revision: 1486721
URL: http://svn.apache.org/r1486721
Log:
HIVE-4540 JOIN-GRP BY-DISTINCT fails with NPE when mapjoin.mapreduce=true (Gunther Hagleitner via Navis)
Added:
hive/trunk/ql/src/test/queries/clientpositive/auto_join33.q
hive/trunk/ql/src/test/results/clientpositive/auto_join33.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java
hive/trunk/ql/src/test/results/clientpositive/multiMapJoin1.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java?rev=1486721&r1=1486720&r2=1486721&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java Tue May 28 00:54:56 2013
@@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.Fi
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.MapRedTask;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.Utilities;
@@ -364,6 +365,17 @@ public class CommonJoinTaskDispatcher ex
return;
}
+ // remove the unnecessary TableScan
+ if (childAliasOp instanceof TableScanOperator) {
+ TableScanOperator tso = (TableScanOperator)childAliasOp;
+ if (tso.getNumChild() != 1) {
+ // shouldn't happen
+ return;
+ }
+ childAliasOp = tso.getChildOperators().get(0);
+ childAliasOp.getParentOperators().remove(tso);
+ }
+
// Merge the 2 trees - remove the FileSinkOperator from the first tree pass it to the
// top of the second
Operator<? extends Serializable> parentFOp = mapJoinTaskFileSinkOperator
Added: hive/trunk/ql/src/test/queries/clientpositive/auto_join33.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_join33.q?rev=1486721&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/auto_join33.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/auto_join33.q Tue May 28 00:54:56 2013
@@ -0,0 +1,16 @@
+set hive.auto.convert.join=true;
+set hive.optimize.mapjoin.mapreduce=true;
+
+-- empty tables
+create table studenttab10k (name string, age int, gpa double);
+create table votertab10k (name string, age int, registration string, contributions float);
+
+explain select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name;
+
+select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name;
Added: hive/trunk/ql/src/test/results/clientpositive/auto_join33.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/auto_join33.q.out?rev=1486721&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/auto_join33.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/auto_join33.q.out Tue May 28 00:54:56 2013
@@ -0,0 +1,148 @@
+PREHOOK: query: -- empty tables
+create table studenttab10k (name string, age int, gpa double)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- empty tables
+create table studenttab10k (name string, age int, gpa double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@studenttab10k
+PREHOOK: query: create table votertab10k (name string, age int, registration string, contributions float)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table votertab10k (name string, age int, registration string, contributions float)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@votertab10k
+PREHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k) s) (TOK_TABREF (TOK_TABNAME votertab10k) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v) name)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) name))))
+
+STAGE DEPENDENCIES:
+ Stage-5 is a root stage
+ Stage-4 depends on stages: Stage-5
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ s
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ s
+ TableScan
+ alias: s
+ HashTable Sink Operator
+ condition expressions:
+ 0 {name}
+ 1 {registration}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[name]]
+ 1 [Column[name]]
+ Position of Big Table: 1
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+ v
+ TableScan
+ alias: v
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {name}
+ 1 {registration}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[name]]
+ 1 [Column[name]]
+ outputColumnNames: _col0, _col7
+ Position of Big Table: 1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col7
+ type: string
+ outputColumnNames: _col0, _col7
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT _col7)
+ bucketGroup: false
+ keys:
+ expr: _col0
+ type: string
+ expr: _col7
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Local Work:
+ Map Reduce Local Work
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name
+PREHOOK: type: QUERY
+PREHOOK: Input: default@studenttab10k
+PREHOOK: Input: default@votertab10k
+#### A masked pattern was here ####
+POSTHOOK: query: select s.name, count(distinct registration)
+from studenttab10k s join votertab10k v
+on (s.name = v.name)
+group by s.name
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@studenttab10k
+POSTHOOK: Input: default@votertab10k
+#### A masked pattern was here ####
Modified: hive/trunk/ql/src/test/results/clientpositive/multiMapJoin1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/multiMapJoin1.q.out?rev=1486721&r1=1486720&r2=1486721&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/multiMapJoin1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/multiMapJoin1.q.out Tue May 28 00:54:56 2013
@@ -598,18 +598,18 @@ STAGE PLANS:
type: string
mode: hash
outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: string
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: string
- tag: -1
- value expressions:
- expr: _col1
- type: bigint
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
Local Work:
Map Reduce Local Work
Reduce Operator Tree:
@@ -1836,12 +1836,12 @@ STAGE PLANS:
bucketGroup: false
mode: hash
outputColumnNames: _col0
- Reduce Output Operator
- sort order:
- tag: -1
- value expressions:
- expr: _col0
- type: bigint
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: bigint
Local Work:
Map Reduce Local Work
Reduce Operator Tree: