You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/10/12 19:51:12 UTC
[04/10] hive git commit: HIVE-12032: Add unit test for HIVE-9855 (Wei
Zheng via Jason Dere)
HIVE-12032: Add unit test for HIVE-9855 (Wei Zheng via Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/04febfd6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/04febfd6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/04febfd6
Branch: refs/heads/llap
Commit: 04febfd625286058066c1b57b62d278b7fb51d6f
Parents: cc2adc7
Author: Jason Dere <jd...@hortonworks.com>
Authored: Fri Oct 9 10:39:50 2015 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Fri Oct 9 10:39:50 2015 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../clientpositive/skewjoin_onesideskew.q | 22 ++
.../clientpositive/skewjoin_onesideskew.q.out | 212 +++++++++++++++++++
3 files changed, 235 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/04febfd6/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 9c9f4cc..ad47fac 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -42,6 +42,7 @@ minimr.query.files=auto_sortmerge_join_16.q,\
schemeAuthority2.q,\
scriptfile1.q,\
scriptfile1_win.q,\
+ skewjoin_onesideskew.q,\
stats_counter.q,\
stats_counter_partitioned.q,\
table_nonprintable.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/04febfd6/ql/src/test/queries/clientpositive/skewjoin_onesideskew.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/skewjoin_onesideskew.q b/ql/src/test/queries/clientpositive/skewjoin_onesideskew.q
new file mode 100644
index 0000000..371f05c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/skewjoin_onesideskew.q
@@ -0,0 +1,22 @@
+set hive.auto.convert.join=false;
+set hive.optimize.skewjoin=true;
+set hive.skewjoin.key=2;
+
+
+DROP TABLE IF EXISTS skewtable;
+CREATE TABLE skewtable (key STRING, value STRING) STORED AS TEXTFILE;
+INSERT INTO TABLE skewtable VALUES ("0", "val_0");
+INSERT INTO TABLE skewtable VALUES ("0", "val_0");
+INSERT INTO TABLE skewtable VALUES ("0", "val_0");
+
+DROP TABLE IF EXISTS nonskewtable;
+CREATE TABLE nonskewtable (key STRING, value STRING) STORED AS TEXTFILE;
+INSERT INTO TABLE nonskewtable VALUES ("1", "val_1");
+INSERT INTO TABLE nonskewtable VALUES ("2", "val_2");
+
+EXPLAIN
+CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key;
+CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key;
+
+SELECT * FROM result;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/04febfd6/ql/src/test/results/clientpositive/skewjoin_onesideskew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/skewjoin_onesideskew.q.out b/ql/src/test/results/clientpositive/skewjoin_onesideskew.q.out
new file mode 100644
index 0000000..f8cde9b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/skewjoin_onesideskew.q.out
@@ -0,0 +1,212 @@
+PREHOOK: query: DROP TABLE IF EXISTS skewtable
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS skewtable
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE skewtable (key STRING, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@skewtable
+POSTHOOK: query: CREATE TABLE skewtable (key STRING, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@skewtable
+PREHOOK: query: INSERT INTO TABLE skewtable VALUES ("0", "val_0")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@skewtable
+POSTHOOK: query: INSERT INTO TABLE skewtable VALUES ("0", "val_0")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@skewtable
+POSTHOOK: Lineage: skewtable.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: skewtable.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO TABLE skewtable VALUES ("0", "val_0")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@skewtable
+POSTHOOK: query: INSERT INTO TABLE skewtable VALUES ("0", "val_0")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@skewtable
+POSTHOOK: Lineage: skewtable.key SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: skewtable.value SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO TABLE skewtable VALUES ("0", "val_0")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__3
+PREHOOK: Output: default@skewtable
+POSTHOOK: query: INSERT INTO TABLE skewtable VALUES ("0", "val_0")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__3
+POSTHOOK: Output: default@skewtable
+POSTHOOK: Lineage: skewtable.key SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: skewtable.value SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: DROP TABLE IF EXISTS nonskewtable
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS nonskewtable
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE nonskewtable (key STRING, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@nonskewtable
+POSTHOOK: query: CREATE TABLE nonskewtable (key STRING, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@nonskewtable
+PREHOOK: query: INSERT INTO TABLE nonskewtable VALUES ("1", "val_1")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__4
+PREHOOK: Output: default@nonskewtable
+POSTHOOK: query: INSERT INTO TABLE nonskewtable VALUES ("1", "val_1")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__4
+POSTHOOK: Output: default@nonskewtable
+POSTHOOK: Lineage: nonskewtable.key SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: nonskewtable.value SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO TABLE nonskewtable VALUES ("2", "val_2")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__5
+PREHOOK: Output: default@nonskewtable
+POSTHOOK: query: INSERT INTO TABLE nonskewtable VALUES ("2", "val_2")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__5
+POSTHOOK: Output: default@nonskewtable
+POSTHOOK: Lineage: nonskewtable.key SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: nonskewtable.value SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: EXPLAIN
+CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key
+PREHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: query: EXPLAIN
+CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key
+POSTHOOK: type: CREATETABLE_AS_SELECT
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-5 depends on stages: Stage-1 , consists of Stage-6, Stage-0
+ Stage-6
+ Stage-4 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-4
+ Stage-7 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-7
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ value expressions: value (type: string)
+ TableScan
+ alias: b
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ handleSkewJoin: true
+ keys:
+ 0 key (type: string)
+ 1 key (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.result
+
+ Stage: Stage-5
+ Conditional Operator
+
+ Stage: Stage-6
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ 1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ 1
+ TableScan
+ HashTable Sink Operator
+ keys:
+ 0 reducesinkkey0 (type: string)
+ 1 reducesinkkey0 (type: string)
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 reducesinkkey0 (type: string)
+ 1 reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.result
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-7
+ Create Table Operator:
+ Create Table
+ columns: key string, value string
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+ serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.result
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+PREHOOK: query: CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@nonskewtable
+PREHOOK: Input: default@skewtable
+PREHOOK: Output: database:default
+PREHOOK: Output: default@result
+POSTHOOK: query: CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@nonskewtable
+POSTHOOK: Input: default@skewtable
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@result
+PREHOOK: query: SELECT * FROM result
+PREHOOK: type: QUERY
+PREHOOK: Input: default@result
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM result
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@result
+#### A masked pattern was here ####