You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2015/11/12 04:59:57 UTC
[33/55] [abbrv] hive git commit: HIVE-12232 :
BucketingSortingReduceSinkOptimizer throws IOB exception for duplicate
columns
HIVE-12232 : BucketingSortingReduceSinkOptimizer throws IOB exception for duplicate columns
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1efb92a8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1efb92a8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1efb92a8
Branch: refs/heads/spark
Commit: 1efb92a8e5ba5aa662fc26a8bcaa5c972c18f7c6
Parents: ab7794c
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Thu Nov 5 18:25:24 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Sun Nov 8 17:03:54 2015 -0800
----------------------------------------------------------------------
.../BucketingSortingReduceSinkOptimizer.java | 6 ++
.../clientpositive/insertoverwrite_bucket.q | 9 +++
.../clientpositive/insertoverwrite_bucket.q.out | 78 ++++++++++++++++++++
3 files changed, 93 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1efb92a8/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
index a090a5b..d5df34c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
@@ -588,6 +588,12 @@ public class BucketingSortingReduceSinkOptimizer implements Transform {
}
// Only columns can be selected for both sorted and bucketed positions
for (int pos : bucketPositions) {
+ if (pos >= selectDesc.getColList().size()) {
+ // e.g., INSERT OVERWRITE TABLE temp1 SELECT c0, c0 FROM temp2;
+ // In such a case Select Op will only have one instance of c0 and RS would have two.
+ // So, locating bucketCol in such cases will generate error. So, bail out.
+ return null;
+ }
ExprNodeDesc selectColList = selectDesc.getColList().get(pos);
if (!(selectColList instanceof ExprNodeColumnDesc)) {
return null;
http://git-wip-us.apache.org/repos/asf/hive/blob/1efb92a8/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q b/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q
index d939710..5a10f94 100644
--- a/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q
+++ b/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q
@@ -18,10 +18,19 @@ insert into table bucketinput values ("firstinsert3");
set hive.enforce.bucketing = true;
set hive.enforce.sorting=true;
insert overwrite table bucketoutput1 select * from bucketinput where data like 'first%';
+CREATE TABLE temp1
+(
+ change string,
+ num string
+)
+CLUSTERED BY (num) SORTED BY (num) INTO 4 BUCKETS;
+explain insert overwrite table temp1 select data, data from bucketinput;
+
set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
select * from bucketoutput1 a join bucketoutput2 b on (a.data=b.data);
+drop table temp1;
drop table buckettestinput;
drop table buckettestoutput1;
drop table buckettestoutput2;
http://git-wip-us.apache.org/repos/asf/hive/blob/1efb92a8/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out b/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out
index 9b7b85d..4add20c 100644
--- a/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out
+++ b/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out
@@ -80,6 +80,76 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@bucketinput
POSTHOOK: Output: default@bucketoutput1
POSTHOOK: Lineage: bucketoutput1.data SIMPLE [(bucketinput)bucketinput.FieldSchema(name:data, type:string, comment:null), ]
+PREHOOK: query: CREATE TABLE temp1
+(
+ change string,
+ num string
+)
+CLUSTERED BY (num) SORTED BY (num) INTO 4 BUCKETS
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@temp1
+POSTHOOK: query: CREATE TABLE temp1
+(
+ change string,
+ num string
+)
+CLUSTERED BY (num) SORTED BY (num) INTO 4 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@temp1
+PREHOOK: query: explain insert overwrite table temp1 select data, data from bucketinput
+PREHOOK: type: QUERY
+POSTHOOK: query: explain insert overwrite table temp1 select data, data from bucketinput
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: bucketinput
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: data (type: string)
+ outputColumnNames: _col1
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.temp1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.temp1
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
PREHOOK: query: select * from bucketoutput1 a join bucketoutput2 b on (a.data=b.data)
PREHOOK: type: QUERY
PREHOOK: Input: default@bucketoutput1
@@ -90,6 +160,14 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@bucketoutput1
POSTHOOK: Input: default@bucketoutput2
#### A masked pattern was here ####
+PREHOOK: query: drop table temp1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@temp1
+PREHOOK: Output: default@temp1
+POSTHOOK: query: drop table temp1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@temp1
+POSTHOOK: Output: default@temp1
PREHOOK: query: drop table buckettestinput
PREHOOK: type: DROPTABLE
POSTHOOK: query: drop table buckettestinput