You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2015/11/17 17:05:50 UTC
[10/18] hive git commit: HIVE-12396 :
BucketingSortingReduceSinkOptimizer may still throw IOB exception for
duplicate columns (Ashutosh Chauhan via Jesus Camacho Rodriguez)
HIVE-12396 : BucketingSortingReduceSinkOptimizer may still throw IOB exception for duplicate columns (Ashutosh Chauhan via Jesus Camacho Rodriguez)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f18849b1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f18849b1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f18849b1
Branch: refs/heads/spark
Commit: f18849b117b93670600da58e0914774f9a575425
Parents: 0804ddb
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Thu Nov 12 11:28:33 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Fri Nov 13 10:14:27 2015 -0800
----------------------------------------------------------------------
.../BucketingSortingReduceSinkOptimizer.java | 3 +
.../clientpositive/insertoverwrite_bucket.q | 12 +++
.../clientpositive/insertoverwrite_bucket.q.out | 87 ++++++++++++++++++++
3 files changed, 102 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f18849b1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
index d5df34c..f59ab4b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
@@ -602,6 +602,9 @@ public class BucketingSortingReduceSinkOptimizer implements Transform {
}
for (int pos : sortPositions) {
+ if (pos >= selectDesc.getColList().size()) {
+ return null;
+ }
ExprNodeDesc selectColList = selectDesc.getColList().get(pos);
if (!(selectColList instanceof ExprNodeColumnDesc)) {
return null;
http://git-wip-us.apache.org/repos/asf/hive/blob/f18849b1/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q b/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q
index 5a10f94..50f9361 100644
--- a/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q
+++ b/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q
@@ -25,12 +25,24 @@ CREATE TABLE temp1
)
CLUSTERED BY (num) SORTED BY (num) INTO 4 BUCKETS;
explain insert overwrite table temp1 select data, data from bucketinput;
+CREATE TABLE temp2
+(
+ create_ts STRING ,
+ change STRING,
+ num STRING
+)
+CLUSTERED BY (create_ts) SORTED BY (num) INTO 4 BUCKETS;
+explain
+INSERT OVERWRITE TABLE temp2
+SELECT change, change,num
+FROM temp1;
set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
select * from bucketoutput1 a join bucketoutput2 b on (a.data=b.data);
drop table temp1;
+drop table temp2;
drop table buckettestinput;
drop table buckettestoutput1;
drop table buckettestoutput2;
http://git-wip-us.apache.org/repos/asf/hive/blob/f18849b1/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out b/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out
index 4add20c..bfbe87b 100644
--- a/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out
+++ b/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out
@@ -150,6 +150,85 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+PREHOOK: query: CREATE TABLE temp2
+(
+ create_ts STRING ,
+ change STRING,
+ num STRING
+)
+CLUSTERED BY (create_ts) SORTED BY (num) INTO 4 BUCKETS
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@temp2
+POSTHOOK: query: CREATE TABLE temp2
+(
+ create_ts STRING ,
+ change STRING,
+ num STRING
+)
+CLUSTERED BY (create_ts) SORTED BY (num) INTO 4 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@temp2
+PREHOOK: query: explain
+INSERT OVERWRITE TABLE temp2
+SELECT change, change,num
+FROM temp1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+INSERT OVERWRITE TABLE temp2
+SELECT change, change,num
+FROM temp1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: temp1
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: change (type: string), num (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col2 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col0 (type: string)
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.temp2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.temp2
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
PREHOOK: query: select * from bucketoutput1 a join bucketoutput2 b on (a.data=b.data)
PREHOOK: type: QUERY
PREHOOK: Input: default@bucketoutput1
@@ -168,6 +247,14 @@ POSTHOOK: query: drop table temp1
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@temp1
POSTHOOK: Output: default@temp1
+PREHOOK: query: drop table temp2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@temp2
+PREHOOK: Output: default@temp2
+POSTHOOK: query: drop table temp2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@temp2
+POSTHOOK: Output: default@temp2
PREHOOK: query: drop table buckettestinput
PREHOOK: type: DROPTABLE
POSTHOOK: query: drop table buckettestinput