You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2017/12/12 20:51:35 UTC
[12/12] hive git commit: HIVE-18208: SMB Join : Fix the unit tests to
run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)
HIVE-18208: SMB Join : Fix the unit tests to run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/38405c14
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/38405c14
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/38405c14
Branch: refs/heads/master
Commit: 38405c1458cf2c6ee508fedf38581df1fc8c1f61
Parents: 1320d2b
Author: Jason Dere <jd...@hortonworks.com>
Authored: Tue Dec 12 12:50:51 2017 -0800
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Tue Dec 12 12:50:51 2017 -0800
----------------------------------------------------------------------
.../clientpositive/auto_sortmerge_join_1.q | 2 +
.../clientpositive/auto_sortmerge_join_10.q | 3 +
.../clientpositive/auto_sortmerge_join_11.q | 8 +-
.../clientpositive/auto_sortmerge_join_12.q | 2 +
.../clientpositive/auto_sortmerge_join_13.q | 2 +-
.../clientpositive/auto_sortmerge_join_14.q | 2 +
.../clientpositive/auto_sortmerge_join_15.q | 2 +
.../clientpositive/auto_sortmerge_join_2.q | 2 +
.../clientpositive/auto_sortmerge_join_3.q | 2 +
.../clientpositive/auto_sortmerge_join_4.q | 2 +
.../clientpositive/auto_sortmerge_join_7.q | 2 +
.../clientpositive/auto_sortmerge_join_8.q | 2 +
.../clientpositive/auto_sortmerge_join_9.q | 3 +
.../bucketsortoptimize_insert_2.q | 2 +
.../bucketsortoptimize_insert_6.q | 1 +
.../bucketsortoptimize_insert_7.q | 2 +
.../test/queries/clientpositive/quotedid_smb.q | 6 +
ql/src/test/queries/clientpositive/smb_cache.q | 11 +-
.../clientpositive/auto_sortmerge_join_10.q.out | 331 +++-
.../clientpositive/auto_sortmerge_join_11.q.out | 1043 ++++++----
.../clientpositive/auto_sortmerge_join_12.q.out | 1790 +++++++++++++++++-
.../llap/auto_sortmerge_join_1.q.out | 329 ++--
.../llap/auto_sortmerge_join_10.q.out | 113 +-
.../llap/auto_sortmerge_join_11.q.out | 220 +--
.../llap/auto_sortmerge_join_12.q.out | 40 +-
.../llap/auto_sortmerge_join_13.q.out | 37 +-
.../llap/auto_sortmerge_join_14.q.out | 46 +-
.../llap/auto_sortmerge_join_15.q.out | 46 +-
.../llap/auto_sortmerge_join_2.q.out | 306 ++-
.../llap/auto_sortmerge_join_3.q.out | 329 ++--
.../llap/auto_sortmerge_join_4.q.out | 329 ++--
.../llap/auto_sortmerge_join_7.q.out | 352 ++--
.../llap/auto_sortmerge_join_8.q.out | 352 ++--
.../llap/auto_sortmerge_join_9.q.out | 1110 +++++------
.../llap/bucketsortoptimize_insert_2.q.out | 306 ++-
.../llap/bucketsortoptimize_insert_6.q.out | 273 +--
.../llap/bucketsortoptimize_insert_7.q.out | 117 +-
.../clientpositive/llap/quotedid_smb.q.out | 81 +-
.../results/clientpositive/llap/smb_cache.q.out | 191 +-
.../spark/auto_sortmerge_join_1.q.out | 99 +-
.../spark/auto_sortmerge_join_12.q.out | 252 +--
.../spark/auto_sortmerge_join_14.q.out | 64 +-
.../spark/auto_sortmerge_join_15.q.out | 64 +-
.../spark/auto_sortmerge_join_2.q.out | 99 +-
.../spark/auto_sortmerge_join_3.q.out | 149 +-
.../spark/auto_sortmerge_join_4.q.out | 149 +-
.../spark/auto_sortmerge_join_7.q.out | 149 +-
.../spark/auto_sortmerge_join_8.q.out | 149 +-
.../spark/auto_sortmerge_join_9.q.out | 736 ++-----
.../spark/bucketsortoptimize_insert_2.q.out | 240 +--
.../clientpositive/spark/quotedid_smb.q.out | 51 +
51 files changed, 4974 insertions(+), 5024 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q
index a1d5249..a044c6d 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_1.q
@@ -26,6 +26,8 @@ set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
-- Since size is being used to find the big table, the order of the tables in the join does not matter
explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q
index e65344d..abd3a34 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q
@@ -20,6 +20,9 @@ set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
set hive.auto.convert.sortmerge.join=true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=1;
+
-- One of the subqueries contains a union, so it should not be converted to a sort-merge join.
explain
select count(*) from
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
index 11499f8..1393351 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q
@@ -3,11 +3,11 @@ set hive.strict.checks.bucketing=false;
set hive.mapred.mode=nonstrict;
-- small 1 part, 2 bucket & big 2 part, 4 bucket
-CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (KEY) INTO 2 BUCKETS STORED AS TEXTFILE;
load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08');
-CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY(KEY) INTO 4 BUCKETS STORED AS TEXTFILE;
load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08');
@@ -19,14 +19,14 @@ load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bu
load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09');
set hive.auto.convert.join=true;
-
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin=true;
set hive.optimize.bucketmapjoin.sortedmerge=true;
-
-- Since size is being used to find the big table, the order of the tables in the join does not matter
-- The tables are only bucketed and not sorted, the join should not be converted
-- Currenly, a join is only converted to a sort-merge join without a hint, automatic conversion to
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q
index b512cc5..cfaef76 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_12.q
@@ -23,6 +23,8 @@ set hive.auto.convert.join=true;
set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
CREATE TABLE bucket_medium (key string, value string) partitioned by (ds string)
CLUSTERED BY (key) SORTED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
index b5706be..32b57ab 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
@@ -22,6 +22,7 @@ set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.join=true;
-- A SMB join followed by a mutli-insert
+set hive.auto.convert.join.noconditionaltask.size=200;
explain
from (
SELECT a.key key1, a.value value1, b.key key2, b.value value2
@@ -41,7 +42,6 @@ select * from dest1;
select * from dest2;
set hive.auto.convert.join.noconditionaltask=true;
-set hive.auto.convert.join.noconditionaltask.size=200;
set hive.mapjoin.hybridgrace.minwbsize=100;
set hive.mapjoin.hybridgrace.minnumpartitions=2;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q
index dd59c74..7ce7e05 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_14.q
@@ -17,6 +17,8 @@ set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.join=true;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
-- Since tbl1 is the bigger table, tbl1 Left Outer Join tbl2 can be performed
explain
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_15.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_15.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_15.q
index 1480b15..84a8ed7 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_15.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_15.q
@@ -17,6 +17,8 @@ set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.join=true;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
explain
select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q
index e77d937..122bba5 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_2.q
@@ -21,6 +21,8 @@ set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
-- Since the leftmost table is assumed as the big table, arrange the tables in the join accordingly
explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q
index 183f033..fa6e0af 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_3.q
@@ -21,6 +21,8 @@ set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=100;
-- Since size is being used to find the big table, the order of the tables in the join does not matter
explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q
index 21f273a..3540779 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_4.q
@@ -23,6 +23,8 @@ set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=200;
-- Since size is being used to find the big table, the order of the tables in the join does not matter
explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q
index cf12331..5a16d8c 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_7.q
@@ -26,6 +26,8 @@ set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
-- Since size is being used to find the big table, the order of the tables in the join does not matter
explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q
index 5ec4e26..4ae7b1d 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_8.q
@@ -29,6 +29,8 @@ set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
+--disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
-- Since size is being used to find the big table, the order of the tables in the join does not matter
explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q
index f95631f..08dbf6c 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_9.q
@@ -20,6 +20,9 @@ set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join=true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
+--disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
+
-- The join is being performed as part of sub-query. It should be converted to a sort-merge join
explain
select count(*) from (
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q
index 4a14587..c812cf1 100644
--- a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q
+++ b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_2.q
@@ -11,6 +11,8 @@ set hive.merge.mapredfiles=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
-- Create two bucketed and sorted tables
CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q
index ec0c2dc..894ba81 100644
--- a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q
+++ b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_6.q
@@ -28,6 +28,7 @@ INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT key, key+1, value
-- Insert data into the bucketed table by selecting from another bucketed table
-- This should be a map-only operation, since the sort-order matches
+set hive.auto.convert.join.noconditionaltask.size=800;
EXPLAIN
INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
SELECT a.key, a.key2, concat(a.value, b.value)
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q
index 45635c1..a68c0b4 100644
--- a/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q
+++ b/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_7.q
@@ -11,6 +11,8 @@ set hive.merge.mapredfiles=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy=org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSelectorForAutoSMJ;
set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=10;
-- Create two bucketed and sorted tables
CREATE TABLE test_table1 (key INT, value STRING) PARTITIONED BY (ds STRING)
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/quotedid_smb.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/quotedid_smb.q b/ql/src/test/queries/clientpositive/quotedid_smb.q
index 25d1f0e..11642f5 100644
--- a/ql/src/test/queries/clientpositive/quotedid_smb.q
+++ b/ql/src/test/queries/clientpositive/quotedid_smb.q
@@ -27,7 +27,13 @@ set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
+-- disable hash joins
+set hive.auto.convert.join.noconditionaltask.size=1000;
+explain select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q`
+from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q`
+where a.`x+1` < '11'
+;
select a.`x+1`, a.`!@#$%^&*()_q`, b.`x+1`, b.`!@#$%^&*()_q`
from src_b a join src_b2 b on a.`!@#$%^&*()_q` = b.`!@#$%^&*()_q`
where a.`x+1` < '11'
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/queries/clientpositive/smb_cache.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/smb_cache.q b/ql/src/test/queries/clientpositive/smb_cache.q
index e415e51..67a9242 100644
--- a/ql/src/test/queries/clientpositive/smb_cache.q
+++ b/ql/src/test/queries/clientpositive/smb_cache.q
@@ -1,4 +1,4 @@
-set hive.explain.user=true;
+set hive.explain.user=false;
create table bug_201_input_a (
userid int
) clustered by (userid) sorted by (userid) into 64 BUCKETS ;
@@ -86,6 +86,7 @@ select distinct(userid) as userid from (
162031843 ,141532840 ,154222699 ,109320121 ,155198842
)) as arr )a ) b;
+
explain
select
t1.userid,
@@ -97,12 +98,12 @@ select
t1.userid,
fa.userid as fa_userid
from bug_201_input_b as t1
- join bug_201_input_a as fa on (t1.userid = fa.userid) ;
+ join bug_201_input_a as fa on (t1.userid = fa.userid) order by t1.userid, fa.userid;
set hive.auto.convert.join=true;
-set hive.auto.convert.join.noconditionaltask.size=100 ;
-set hive.auto.convert.sortmerge.join=true
+set hive.auto.convert.join.noconditionaltask.size=5;
+set hive.auto.convert.sortmerge.join=true;
set hive.convert.join.bucket.mapjoin.tez = true;
set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
@@ -117,5 +118,5 @@ select
t1.userid,
fa.userid as fa_userid
from bug_201_input_b as t1
- join bug_201_input_a as fa on (t1.userid = fa.userid) ;
+ join bug_201_input_a as fa on (t1.userid = fa.userid) order by t1.userid, fa.userid;
http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out
index 22ac2a2..3a90b05 100644
--- a/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out
+++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_10.q.out
@@ -63,13 +63,20 @@ select count(*) from
on subq1.key = subq2.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-6 is a root stage
- Stage-5 depends on stages: Stage-6
- Stage-2 depends on stages: Stage-5
+ Stage-7 is a root stage , consists of Stage-8, Stage-9, Stage-1
+ Stage-8 has a backup stage: Stage-1
+ Stage-5 depends on stages: Stage-8
+ Stage-2 depends on stages: Stage-1, Stage-5, Stage-6
+ Stage-9 has a backup stage: Stage-1
+ Stage-6 depends on stages: Stage-9
+ Stage-1
Stage-0 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-6
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-8
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_1:a
@@ -180,6 +187,157 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Stage: Stage-9
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0-subquery1:$hdt$_0-subquery1:a
+ Fetch Operator
+ limit: -1
+ $hdt$_0-subquery2:$hdt$_0-subquery2:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_0-subquery1:$hdt$_0-subquery1:a
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ $hdt$_0-subquery2:$hdt$_0-subquery2:a
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
Stage: Stage-0
Fetch Operator
limit: -1
@@ -230,36 +388,43 @@ select count(*) from
on subq1.key = subq2.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-2 depends on stages: Stage-5
+ Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1
+ Stage-7 has a backup stage: Stage-1
+ Stage-4 depends on stages: Stage-7
+ Stage-2 depends on stages: Stage-1, Stage-4, Stage-5
+ Stage-8 has a backup stage: Stage-1
+ Stage-5 depends on stages: Stage-8
+ Stage-1
Stage-0 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
+ Conditional Operator
+
+ Stage: Stage-7
Map Reduce Local Work
Alias -> Map Local Tables:
- $hdt$_0:a
+ $hdt$_1:a
Fetch Operator
limit: -1
Alias -> Map Local Operator Tree:
- $hdt$_0:a
+ $hdt$_1:a
TableScan
alias: a
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key < 6) (type: boolean)
Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: key (type: int)
- mode: final
+ Select Operator
+ expressions: key (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
HashTable Sink Operator
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- Stage: Stage-2
+ Stage: Stage-4
Map Reduce
Map Operator Tree:
TableScan
@@ -268,10 +433,11 @@ STAGE PLANS:
Filter Operator
predicate: (key < 6) (type: boolean)
Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
+ Group By Operator
+ keys: key (type: int)
+ mode: final
outputColumnNames: _col0
- Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -284,12 +450,23 @@ STAGE PLANS:
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint)
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Local Work:
Map Reduce Local Work
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -304,6 +481,118 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Stage: Stage-8
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0:a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_0:a
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: key (type: int)
+ mode: final
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: key (type: int)
+ mode: final
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: a
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key < 6) (type: boolean)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
Stage: Stage-0
Fetch Operator
limit: -1