You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2015/07/31 02:43:22 UTC
[22/43] hive git commit: HIVE-11271 :
java.lang.IndexOutOfBoundsException when union all with if function (Yongzhi
Chen via Szehon)
HIVE-11271 : java.lang.IndexOutOfBoundsException when union all with if function (Yongzhi Chen via Szehon)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c2dc452e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c2dc452e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c2dc452e
Branch: refs/heads/spark
Commit: c2dc452ece2d625a974a189d64cc42784597c5f6
Parents: 9f47426
Author: Szehon Ho <sz...@cloudera.com>
Authored: Mon Jul 27 11:34:12 2015 -0700
Committer: Szehon Ho <sz...@cloudera.com>
Committed: Mon Jul 27 11:34:12 2015 -0700
----------------------------------------------------------------------
.../hive/ql/optimizer/ColumnPrunerProcCtx.java | 64 ++++
.../ql/optimizer/ColumnPrunerProcFactory.java | 2 +-
.../clientpositive/unionall_unbalancedppd.q | 120 ++++++
.../clientpositive/unionall_unbalancedppd.q.out | 373 +++++++++++++++++++
4 files changed, 558 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c2dc452e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
index c076d4e..8bcb464 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
@@ -25,7 +25,9 @@ import java.util.Map;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
@@ -33,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
@@ -241,4 +244,65 @@ public class ColumnPrunerProcCtx implements NodeProcessorCtx {
}
return columns;
}
+
+ /**
+ * If the input filter operator has direct child(ren) which are union operator,
+ * and the filter's column is not the same as union's
+ * create select operator between them. The select operator has same number of columns as
+ * pruned child operator.
+ *
+ * @param curOp
+ * The filter operator which need to handle children.
+ * @throws SemanticException
+ */
+ public void handleFilterUnionChildren(Operator<? extends OperatorDesc> curOp)
+ throws SemanticException {
+ if (curOp.getChildOperators() == null || !(curOp instanceof FilterOperator)) {
+ return;
+ }
+ List<String> parentPrunList = prunedColLists.get(curOp);
+ if(parentPrunList == null || parentPrunList.size() == 0) {
+ return;
+ }
+ FilterOperator filOp = (FilterOperator)curOp;
+ List<String> prunList = null;
+ List<Integer>[] childToParentIndex = null;
+
+ for (Operator<? extends OperatorDesc> child : curOp.getChildOperators()) {
+ if (child instanceof UnionOperator) {
+ prunList = prunedColLists.get(child);
+ if (prunList == null || prunList.size() == 0 || parentPrunList.size() == prunList.size()) {
+ continue;
+ }
+
+ ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
+ ArrayList<String> outputColNames = new ArrayList<String>();
+ Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
+ ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
+ for (ColumnInfo colInfo : child.getSchema().getSignature()) {
+ if (!prunList.contains(colInfo.getInternalName())) {
+ continue;
+ }
+ ExprNodeDesc colDesc = new ExprNodeColumnDesc(colInfo.getType(),
+ colInfo.getInternalName(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
+ exprs.add(colDesc);
+ outputColNames.add(colInfo.getInternalName());
+ ColumnInfo newCol = new ColumnInfo(colInfo.getInternalName(), colInfo.getType(),
+ colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
+ newCol.setAlias(colInfo.getAlias());
+ outputRS.add(newCol);
+ colExprMap.put(colInfo.getInternalName(), colDesc);
+ }
+ SelectDesc select = new SelectDesc(exprs, outputColNames, false);
+ curOp.removeChild(child);
+ SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(
+ select, new RowSchema(outputRS), curOp);
+ OperatorFactory.makeChild(sel, child);
+ sel.setColumnExprMap(colExprMap);
+
+ }
+
+ }
+ }
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/c2dc452e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
index ac4236c..2dc15f9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
@@ -108,7 +108,7 @@ public final class ColumnPrunerProcFactory {
filterOpPrunedColListsOrderPreserved);
pruneOperator(cppCtx, op, cppCtx.getPrunedColLists().get(op));
-
+ cppCtx.handleFilterUnionChildren(op);
return null;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/c2dc452e/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q b/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q
new file mode 100644
index 0000000..0825c2d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/unionall_unbalancedppd.q
@@ -0,0 +1,120 @@
+set hive.optimize.ppd=true;
+drop table if exists union_all_bug_test_1;
+drop table if exists union_all_bug_test_2;
+create table if not exists union_all_bug_test_1
+(
+f1 int,
+f2 int
+);
+
+create table if not exists union_all_bug_test_2
+(
+f1 int
+);
+
+explain SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1);
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1);
+
+insert into table union_all_bug_test_1 values (1,1);
+insert into table union_all_bug_test_2 values (1);
+insert into table union_all_bug_test_1 values (0,0);
+insert into table union_all_bug_test_2 values (0);
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1);
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 0);
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1 or filter = 0);
+
+SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (f1 = 1);
http://git-wip-us.apache.org/repos/asf/hive/blob/c2dc452e/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out b/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out
new file mode 100644
index 0000000..46828e9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/unionall_unbalancedppd.q.out
@@ -0,0 +1,373 @@
+PREHOOK: query: drop table if exists union_all_bug_test_1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists union_all_bug_test_1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists union_all_bug_test_2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists union_all_bug_test_2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table if not exists union_all_bug_test_1
+(
+f1 int,
+f2 int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@union_all_bug_test_1
+POSTHOOK: query: create table if not exists union_all_bug_test_1
+(
+f1 int,
+f2 int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@union_all_bug_test_1
+PREHOOK: query: create table if not exists union_all_bug_test_2
+(
+f1 int
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@union_all_bug_test_2
+POSTHOOK: query: create table if not exists union_all_bug_test_2
+(
+f1 int
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@union_all_bug_test_2
+PREHOOK: query: explain SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: union_all_bug_test_1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (if(true, f1, f2) = 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: f1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Union
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TableScan
+ alias: union_all_bug_test_2
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: false (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: f1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Union
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_all_bug_test_1
+PREHOOK: Input: default@union_all_bug_test_2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_all_bug_test_1
+POSTHOOK: Input: default@union_all_bug_test_2
+#### A masked pattern was here ####
+PREHOOK: query: insert into table union_all_bug_test_1 values (1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@union_all_bug_test_1
+POSTHOOK: query: insert into table union_all_bug_test_1 values (1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@union_all_bug_test_1
+POSTHOOK: Lineage: union_all_bug_test_1.f1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: union_all_bug_test_1.f2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: insert into table union_all_bug_test_2 values (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@union_all_bug_test_2
+POSTHOOK: query: insert into table union_all_bug_test_2 values (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@union_all_bug_test_2
+POSTHOOK: Lineage: union_all_bug_test_2.f1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into table union_all_bug_test_1 values (0,0)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__3
+PREHOOK: Output: default@union_all_bug_test_1
+POSTHOOK: query: insert into table union_all_bug_test_1 values (0,0)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__3
+POSTHOOK: Output: default@union_all_bug_test_1
+POSTHOOK: Lineage: union_all_bug_test_1.f1 EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: union_all_bug_test_1.f2 EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: insert into table union_all_bug_test_2 values (0)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__4
+PREHOOK: Output: default@union_all_bug_test_2
+POSTHOOK: query: insert into table union_all_bug_test_2 values (0)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__4
+POSTHOOK: Output: default@union_all_bug_test_2
+POSTHOOK: Lineage: union_all_bug_test_2.f1 EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_all_bug_test_1
+PREHOOK: Input: default@union_all_bug_test_2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_all_bug_test_1
+POSTHOOK: Input: default@union_all_bug_test_2
+#### A masked pattern was here ####
+1
+PREHOOK: query: SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 0)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_all_bug_test_1
+PREHOOK: Input: default@union_all_bug_test_2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 0)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_all_bug_test_1
+POSTHOOK: Input: default@union_all_bug_test_2
+#### A masked pattern was here ####
+0
+1
+0
+PREHOOK: query: SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1 or filter = 0)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_all_bug_test_1
+PREHOOK: Input: default@union_all_bug_test_2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (filter = 1 or filter = 0)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_all_bug_test_1
+POSTHOOK: Input: default@union_all_bug_test_2
+#### A masked pattern was here ####
+1
+0
+1
+0
+PREHOOK: query: SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (f1 = 1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_all_bug_test_1
+PREHOOK: Input: default@union_all_bug_test_2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT f1
+FROM (
+
+SELECT
+f1
+, if('helloworld' like '%hello%' ,f1,f2) as filter
+FROM union_all_bug_test_1
+
+union all
+
+select
+f1
+, 0 as filter
+from union_all_bug_test_2
+) A
+WHERE (f1 = 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_all_bug_test_1
+POSTHOOK: Input: default@union_all_bug_test_2
+#### A masked pattern was here ####
+1
+1