You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2015/11/17 17:05:50 UTC

[10/18] hive git commit: HIVE-12396 : BucketingSortingReduceSinkOptimizer may still throw IOB exception for duplicate columns (Ashutosh Chauhan via Jesus Camacho Rodriguez)

HIVE-12396 : BucketingSortingReduceSinkOptimizer may still throw IOB exception for duplicate columns (Ashutosh Chauhan via Jesus Camacho Rodriguez)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f18849b1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f18849b1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f18849b1

Branch: refs/heads/spark
Commit: f18849b117b93670600da58e0914774f9a575425
Parents: 0804ddb
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Thu Nov 12 11:28:33 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Fri Nov 13 10:14:27 2015 -0800

----------------------------------------------------------------------
 .../BucketingSortingReduceSinkOptimizer.java    |  3 +
 .../clientpositive/insertoverwrite_bucket.q     | 12 +++
 .../clientpositive/insertoverwrite_bucket.q.out | 87 ++++++++++++++++++++
 3 files changed, 102 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f18849b1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
index d5df34c..f59ab4b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
@@ -602,6 +602,9 @@ public class BucketingSortingReduceSinkOptimizer implements Transform {
             }
 
             for (int pos : sortPositions) {
+              if (pos >= selectDesc.getColList().size()) {
+                return null;
+              }
               ExprNodeDesc selectColList = selectDesc.getColList().get(pos);
               if (!(selectColList instanceof ExprNodeColumnDesc)) {
                 return null;

http://git-wip-us.apache.org/repos/asf/hive/blob/f18849b1/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q b/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q
index 5a10f94..50f9361 100644
--- a/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q
+++ b/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q
@@ -25,12 +25,24 @@ CREATE TABLE temp1
 )
 CLUSTERED BY (num) SORTED BY (num) INTO 4 BUCKETS;
 explain insert overwrite table temp1 select data, data from bucketinput;
+CREATE TABLE temp2
+(
+    create_ts STRING ,
+    change STRING,
+    num STRING
+)
+CLUSTERED BY (create_ts) SORTED BY (num) INTO 4 BUCKETS;
 
+explain
+INSERT OVERWRITE TABLE temp2
+SELECT change, change,num
+FROM temp1;
 set hive.auto.convert.sortmerge.join=true; 
 set hive.optimize.bucketmapjoin = true; 
 set hive.optimize.bucketmapjoin.sortedmerge = true; 
 select * from bucketoutput1 a join bucketoutput2 b on (a.data=b.data);
 drop table temp1;
+drop table temp2;
 drop table buckettestinput;
 drop table buckettestoutput1;
 drop table buckettestoutput2;

http://git-wip-us.apache.org/repos/asf/hive/blob/f18849b1/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out b/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out
index 4add20c..bfbe87b 100644
--- a/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out
+++ b/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out
@@ -150,6 +150,85 @@ STAGE PLANS:
   Stage: Stage-2
     Stats-Aggr Operator
 
+PREHOOK: query: CREATE TABLE temp2
+(
+    create_ts STRING ,
+    change STRING,
+    num STRING
+)
+CLUSTERED BY (create_ts) SORTED BY (num) INTO 4 BUCKETS
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@temp2
+POSTHOOK: query: CREATE TABLE temp2
+(
+    create_ts STRING ,
+    change STRING,
+    num STRING
+)
+CLUSTERED BY (create_ts) SORTED BY (num) INTO 4 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@temp2
+PREHOOK: query: explain
+INSERT OVERWRITE TABLE temp2
+SELECT change, change,num
+FROM temp1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+INSERT OVERWRITE TABLE temp2
+SELECT change, change,num
+FROM temp1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: temp1
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Select Operator
+              expressions: change (type: string), num (type: string)
+              outputColumnNames: _col0, _col2
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col2 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: string)
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                value expressions: _col0 (type: string)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                name: default.temp2
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.temp2
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
 PREHOOK: query: select * from bucketoutput1 a join bucketoutput2 b on (a.data=b.data)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@bucketoutput1
@@ -168,6 +247,14 @@ POSTHOOK: query: drop table temp1
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@temp1
 POSTHOOK: Output: default@temp1
+PREHOOK: query: drop table temp2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@temp2
+PREHOOK: Output: default@temp2
+POSTHOOK: query: drop table temp2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@temp2
+POSTHOOK: Output: default@temp2
 PREHOOK: query: drop table buckettestinput
 PREHOOK: type: DROPTABLE
 POSTHOOK: query: drop table buckettestinput