You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2015/10/09 19:40:45 UTC

hive git commit: HIVE-12032: Add unit test for HIVE-9855 (Wei Zheng via Jason Dere)

Repository: hive
Updated Branches:
  refs/heads/master cc2adc732 -> 04febfd62


HIVE-12032: Add unit test for HIVE-9855 (Wei Zheng via Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/04febfd6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/04febfd6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/04febfd6

Branch: refs/heads/master
Commit: 04febfd625286058066c1b57b62d278b7fb51d6f
Parents: cc2adc7
Author: Jason Dere <jd...@hortonworks.com>
Authored: Fri Oct 9 10:39:50 2015 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Fri Oct 9 10:39:50 2015 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   1 +
 .../clientpositive/skewjoin_onesideskew.q       |  22 ++
 .../clientpositive/skewjoin_onesideskew.q.out   | 212 +++++++++++++++++++
 3 files changed, 235 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/04febfd6/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 9c9f4cc..ad47fac 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -42,6 +42,7 @@ minimr.query.files=auto_sortmerge_join_16.q,\
   schemeAuthority2.q,\
   scriptfile1.q,\
   scriptfile1_win.q,\
+  skewjoin_onesideskew.q,\
   stats_counter.q,\
   stats_counter_partitioned.q,\
   table_nonprintable.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/04febfd6/ql/src/test/queries/clientpositive/skewjoin_onesideskew.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/skewjoin_onesideskew.q b/ql/src/test/queries/clientpositive/skewjoin_onesideskew.q
new file mode 100644
index 0000000..371f05c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/skewjoin_onesideskew.q
@@ -0,0 +1,22 @@
+set hive.auto.convert.join=false;
+set hive.optimize.skewjoin=true;
+set hive.skewjoin.key=2;
+
+
+DROP TABLE IF EXISTS skewtable;
+CREATE TABLE skewtable (key STRING, value STRING) STORED AS TEXTFILE;
+INSERT INTO TABLE skewtable VALUES ("0", "val_0");
+INSERT INTO TABLE skewtable VALUES ("0", "val_0");
+INSERT INTO TABLE skewtable VALUES ("0", "val_0");
+
+DROP TABLE IF EXISTS nonskewtable;
+CREATE TABLE nonskewtable (key STRING, value STRING) STORED AS TEXTFILE;
+INSERT INTO TABLE nonskewtable VALUES ("1", "val_1");
+INSERT INTO TABLE nonskewtable VALUES ("2", "val_2");
+
+EXPLAIN
+CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key;
+CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key;
+
+SELECT * FROM result;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/04febfd6/ql/src/test/results/clientpositive/skewjoin_onesideskew.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/skewjoin_onesideskew.q.out b/ql/src/test/results/clientpositive/skewjoin_onesideskew.q.out
new file mode 100644
index 0000000..f8cde9b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/skewjoin_onesideskew.q.out
@@ -0,0 +1,212 @@
+PREHOOK: query: DROP TABLE IF EXISTS skewtable
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS skewtable
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE skewtable (key STRING, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@skewtable
+POSTHOOK: query: CREATE TABLE skewtable (key STRING, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@skewtable
+PREHOOK: query: INSERT INTO TABLE skewtable VALUES ("0", "val_0")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@skewtable
+POSTHOOK: query: INSERT INTO TABLE skewtable VALUES ("0", "val_0")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@skewtable
+POSTHOOK: Lineage: skewtable.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: skewtable.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO TABLE skewtable VALUES ("0", "val_0")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@skewtable
+POSTHOOK: query: INSERT INTO TABLE skewtable VALUES ("0", "val_0")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@skewtable
+POSTHOOK: Lineage: skewtable.key SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: skewtable.value SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO TABLE skewtable VALUES ("0", "val_0")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__3
+PREHOOK: Output: default@skewtable
+POSTHOOK: query: INSERT INTO TABLE skewtable VALUES ("0", "val_0")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__3
+POSTHOOK: Output: default@skewtable
+POSTHOOK: Lineage: skewtable.key SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: skewtable.value SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: DROP TABLE IF EXISTS nonskewtable
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS nonskewtable
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE nonskewtable (key STRING, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@nonskewtable
+POSTHOOK: query: CREATE TABLE nonskewtable (key STRING, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@nonskewtable
+PREHOOK: query: INSERT INTO TABLE nonskewtable VALUES ("1", "val_1")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__4
+PREHOOK: Output: default@nonskewtable
+POSTHOOK: query: INSERT INTO TABLE nonskewtable VALUES ("1", "val_1")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__4
+POSTHOOK: Output: default@nonskewtable
+POSTHOOK: Lineage: nonskewtable.key SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: nonskewtable.value SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO TABLE nonskewtable VALUES ("2", "val_2")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__5
+PREHOOK: Output: default@nonskewtable
+POSTHOOK: query: INSERT INTO TABLE nonskewtable VALUES ("2", "val_2")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__5
+POSTHOOK: Output: default@nonskewtable
+POSTHOOK: Lineage: nonskewtable.key SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: nonskewtable.value SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: EXPLAIN
+CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key
+PREHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: query: EXPLAIN
+CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key
+POSTHOOK: type: CREATETABLE_AS_SELECT
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-5 depends on stages: Stage-1 , consists of Stage-6, Stage-0
+  Stage-6
+  Stage-4 depends on stages: Stage-6
+  Stage-0 depends on stages: Stage-4
+  Stage-7 depends on stages: Stage-0
+  Stage-2 depends on stages: Stage-7
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: a
+            Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: string)
+                sort order: +
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                value expressions: value (type: string)
+          TableScan
+            alias: b
+            Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: key (type: string)
+                sort order: +
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          handleSkewJoin: true
+          keys:
+            0 key (type: string)
+            1 key (type: string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                name: default.result
+
+  Stage: Stage-5
+    Conditional Operator
+
+  Stage: Stage-6
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        1 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        1 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 reducesinkkey0 (type: string)
+                1 reducesinkkey0 (type: string)
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              keys:
+                0 reducesinkkey0 (type: string)
+                1 reducesinkkey0 (type: string)
+              outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.result
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-0
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-7
+      Create Table Operator:
+        Create Table
+          columns: key string, value string
+          input format: org.apache.hadoop.mapred.TextInputFormat
+          output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+          serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          name: default.result
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+
+PREHOOK: query: CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@nonskewtable
+PREHOOK: Input: default@skewtable
+PREHOOK: Output: database:default
+PREHOOK: Output: default@result
+POSTHOOK: query: CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@nonskewtable
+POSTHOOK: Input: default@skewtable
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@result
+PREHOOK: query: SELECT * FROM result
+PREHOOK: type: QUERY
+PREHOOK: Input: default@result
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM result
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@result
+#### A masked pattern was here ####