You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/10/27 20:57:03 UTC
svn commit: r1634671 [34/46] - in /hive/branches/spark:
itests/src/test/resources/ ql/src/test/results/clientpositive/spark/
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out?rev=1634671&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out Mon Oct 27 19:56:58 2014
@@ -0,0 +1,226 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t1
+PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T2
+POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t2
+PREHOOK: query: -- only of the tables of the join (the left table of the join) is skewed
+-- the skewed filter would still be applied to both the tables
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- only of the tables of the join (the left table of the join) is skewed
+-- the skewed filter would still be applied to both the tables
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: val (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: val (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1 {KEY.reducesinkkey0} {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+2 12 2 22
+3 13 3 13
+8 18 8 18
+8 18 8 18
+8 28 8 18
+8 28 8 18
+PREHOOK: query: -- the order of the join should not matter, just confirming
+EXPLAIN
+SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- the order of the join should not matter, just confirming
+EXPLAIN
+SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: val (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: val (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1 {KEY.reducesinkkey0} {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+2 22 2 12
+3 13 3 13
+8 18 8 18
+8 18 8 18
+8 18 8 28
+8 18 8 28
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out?rev=1634671&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out Mon Oct 27 19:56:58 2014
@@ -0,0 +1,135 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key, val) ON ((2, 12)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t1
+PREHOOK: query: CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T2
+POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t2
+PREHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is
+-- skewed by one column. Ths join is performed on the first skewed column
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- One of the tables is skewed by 2 columns, and the other table is
+-- skewed by one column. Ths join is performed on the first skewed column
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: val (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: val (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1 {KEY.reducesinkkey0} {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+2 12 2 22
+3 13 3 13
+8 18 8 18
+8 18 8 18
+8 28 8 18
+8 28 8 18
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out?rev=1634671&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out Mon Oct 27 19:56:58 2014
@@ -0,0 +1,137 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key, val) ON ((2, 12), (8, 18)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key, val) ON ((2, 12), (8, 18)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t1
+PREHOOK: query: CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key, val) ON ((3, 13), (8, 18)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T2
+POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key, val) ON ((3, 13), (8, 18)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t2
+PREHOOK: query: -- Both the join tables are skewed by 2 keys, and one of the skewed values
+-- is common to both the tables. The join key is a subset of the skewed key set:
+-- it only contains the first skewed key for both the tables
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Both the join tables are skewed by 2 keys, and one of the skewed values
+-- is common to both the tables. The join key is a subset of the skewed key set:
+-- it only contains the first skewed key for both the tables
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: val (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: val (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1 {KEY.reducesinkkey0} {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col5, _col6
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key
+ORDER BY a.key, b.key, a.val, b.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+2 12 2 22
+3 13 3 13
+8 18 8 18
+8 18 8 18
+8 28 8 18
+8 28 8 18
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out?rev=1634671&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out Mon Oct 27 19:56:58 2014
@@ -0,0 +1,166 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t1
+PREHOOK: query: CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T2
+POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t2
+PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T3
+POSTHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T3
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t3
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t3
+PREHOOK: query: -- This test is for validating skewed join compile time optimization for more than
+-- 2 tables. The join key is the same, and so a 3-way join would be performed.
+-- 2 of the 3 tables are skewed on the join key
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- This test is for validating skewed join compile time optimization for more than
+-- 2 tables. The join key is the same, and so a 3-way join would be performed.
+-- 2 of the 3 tables are skewed on the join key
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: val (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: c
+ Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: val (type: string)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: val (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1 {KEY.reducesinkkey0} {VALUE._col0}
+ 2 {KEY.reducesinkkey0} {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key
+ORDER BY a.key, b.key, c.key, a.val, b.val, c.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key
+ORDER BY a.key, b.key, c.key, a.val, b.val, c.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+2 12 2 22 2 12
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out?rev=1634671&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out Mon Oct 27 19:56:58 2014
@@ -0,0 +1,164 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t1
+PREHOOK: query: CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T2
+POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING)
+SKEWED BY (key) ON ((3), (8)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t2
+PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T3
+POSTHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T3
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t3
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t3
+PREHOOK: query: -- This test is for validating skewed join compile time optimization for more than
+-- 2 tables. The join key is the same, and so a 3-way join would be performed.
+-- 1 of the 3 tables are skewed on the join key
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- This test is for validating skewed join compile time optimization for more than
+-- 2 tables. The join key is the same, and so a 3-way join would be performed.
+-- 1 of the 3 tables are skewed on the join key
+-- adding a order by at the end to make the results deterministic
+
+EXPLAIN
+SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: val (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: c
+ Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: val (type: string)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: val (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1 {KEY.reducesinkkey0} {VALUE._col0}
+ 2 {KEY.reducesinkkey0} {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key
+ORDER BY a.key, b.key, c.key, a.val, b.val, c.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key
+ORDER BY a.key, b.key, c.key, a.val, b.val, c.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+2 12 2 22 2 12
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out?rev=1634671&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out Mon Oct 27 19:56:58 2014
@@ -0,0 +1,316 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING)
+SKEWED BY (key) ON ((2)) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t1
+PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T2
+POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t2
+PREHOOK: query: -- no skew join compile time optimization would be performed if one of the
+-- join sources is a sub-query consisting of a union all
+-- adding a order by at the end to make the results deterministic
+EXPLAIN
+select * from
+(
+select key, val from T1
+ union all
+select key, val from T1
+) subq1
+join T2 b on subq1.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- no skew join compile time optimization would be performed if one of the
+-- join sources is a sub-query consisting of a union all
+-- adding a order by at the end to make the results deterministic
+EXPLAIN
+select * from
+(
+select key, val from T1
+ union all
+select key, val from T1
+) subq1
+join T2 b on subq1.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Union 4 (GROUP PARTITION-LEVEL SORT, 1)
+ Union 4 <- Map 3 (NONE, 0), Map 5 (NONE, 0)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: val (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Select Operator
+ expressions: key (type: string), val (type: string)
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ value expressions: _col1 (type: string)
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Select Operator
+ expressions: key (type: string), val (type: string)
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ value expressions: _col1 (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1 {KEY.reducesinkkey0} {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Union 4
+ Vertex: Union 4
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from
+(
+select key, val from T1
+ union all
+select key, val from T1
+) subq1
+join T2 b on subq1.key = b.key
+ORDER BY subq1.key, b.key, subq1.val, b.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from
+(
+select key, val from T1
+ union all
+select key, val from T1
+) subq1
+join T2 b on subq1.key = b.key
+ORDER BY subq1.key, b.key, subq1.val, b.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+2 12 2 22
+2 12 2 22
+3 13 3 13
+3 13 3 13
+8 18 8 18
+8 18 8 18
+8 18 8 18
+8 18 8 18
+8 28 8 18
+8 28 8 18
+8 28 8 18
+8 28 8 18
+PREHOOK: query: -- no skew join compile time optimization would be performed if one of the
+-- join sources is a sub-query consisting of a group by
+EXPLAIN
+select * from
+(
+select key, count(1) as cnt from T1 group by key
+) subq1
+join T2 b on subq1.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- no skew join compile time optimization would be performed if one of the
+-- join sources is a sub-query consisting of a group by
+EXPLAIN
+select * from
+(
+select key, count(1) as cnt from T1 group by key
+) subq1
+join T2 b on subq1.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP, 1)
+ Reducer 3 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1), Reducer 2 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: val (type: string)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 3
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1 {KEY.reducesinkkey0} {VALUE._col0}
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from
+(
+select key, count(1) as cnt from T1 group by key
+) subq1
+join T2 b on subq1.key = b.key
+ORDER BY subq1.key, b.key, subq1.cnt, b.val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from
+(
+select key, count(1) as cnt from T1 group by key
+) subq1
+join T2 b on subq1.key = b.key
+ORDER BY subq1.key, b.key, subq1.cnt, b.val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+2 1 2 22
+3 1 3 13
+8 2 8 18
+8 2 8 18
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out?rev=1634671&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out Mon Oct 27 19:56:58 2014
@@ -0,0 +1,510 @@
+PREHOOK: query: create table hive_test_smb_bucket1 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hive_test_smb_bucket1
+POSTHOOK: query: create table hive_test_smb_bucket1 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hive_test_smb_bucket1
+PREHOOK: query: create table hive_test_smb_bucket2 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hive_test_smb_bucket2
+POSTHOOK: query: create table hive_test_smb_bucket2 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hive_test_smb_bucket2
+PREHOOK: query: -- empty partitions (HIVE-3205)
+explain extended
+SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+PREHOOK: type: QUERY
+POSTHOOK: query: -- empty partitions (HIVE-3205)
+explain extended
+SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+
+TOK_QUERY
+ TOK_FROM
+ TOK_JOIN
+ TOK_TABREF
+ TOK_TABNAME
+ hive_test_smb_bucket1
+ a
+ TOK_TABREF
+ TOK_TABNAME
+ hive_test_smb_bucket2
+ b
+ =
+ .
+ TOK_TABLE_OR_COL
+ a
+ key
+ .
+ TOK_TABLE_OR_COL
+ b
+ key
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_HINTLIST
+ TOK_HINT
+ TOK_MAPJOIN
+ TOK_HINTARGLIST
+ b
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ b
+ key
+ k1
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ b
+ value
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ b
+ ds
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ a
+ key
+ k2
+ TOK_WHERE
+ and
+ and
+ =
+ .
+ TOK_TABLE_OR_COL
+ a
+ ds
+ '2010-10-15'
+ =
+ .
+ TOK_TABLE_OR_COL
+ b
+ ds
+ '2010-10-15'
+ TOK_FUNCTION
+ TOK_ISNOTNULL
+ .
+ TOK_TABLE_OR_COL
+ b
+ key
+
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map 3
+ Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0}
+ 1 {KEY.reducesinkkey0} {VALUE._col0}
+ outputColumnNames: _col0, _col6, _col7
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col6 (type: int), _col7 (type: string), '2010-10-15' (type: string), _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types int:string:string:int
+ escape.delim \
+ hive.serialization.extend.nesting.levels true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive_test_smb_bucket1
+PREHOOK: Input: default@hive_test_smb_bucket2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive_test_smb_bucket1
+POSTHOOK: Input: default@hive_test_smb_bucket2
+#### A masked pattern was here ####
+PREHOOK: query: explain extended
+SELECT /* + MAPJOIN(a) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+SELECT /* + MAPJOIN(a) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+
+TOK_QUERY
+ TOK_FROM
+ TOK_JOIN
+ TOK_TABREF
+ TOK_TABNAME
+ hive_test_smb_bucket1
+ a
+ TOK_TABREF
+ TOK_TABNAME
+ hive_test_smb_bucket2
+ b
+ =
+ .
+ TOK_TABLE_OR_COL
+ a
+ key
+ .
+ TOK_TABLE_OR_COL
+ b
+ key
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_HINTLIST
+ TOK_HINT
+ TOK_MAPJOIN
+ TOK_HINTARGLIST
+ a
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ b
+ key
+ k1
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ b
+ value
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ b
+ ds
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ a
+ key
+ k2
+ TOK_WHERE
+ and
+ and
+ =
+ .
+ TOK_TABLE_OR_COL
+ a
+ ds
+ '2010-10-15'
+ =
+ .
+ TOK_TABLE_OR_COL
+ b
+ ds
+ '2010-10-15'
+ TOK_FUNCTION
+ TOK_ISNOTNULL
+ .
+ TOK_TABLE_OR_COL
+ b
+ key
+
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map 3
+ Reducer 2
+ Needs Tagging: true
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0}
+ 1 {KEY.reducesinkkey0} {VALUE._col0}
+ outputColumnNames: _col0, _col6, _col7
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col6 (type: int), _col7 (type: string), '2010-10-15' (type: string), _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types int:string:string:int
+ escape.delim \
+ hive.serialization.extend.nesting.levels true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT /* + MAPJOIN(a) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive_test_smb_bucket1
+PREHOOK: Input: default@hive_test_smb_bucket2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT /* + MAPJOIN(a) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive_test_smb_bucket1
+POSTHOOK: Input: default@hive_test_smb_bucket2
+#### A masked pattern was here ####
+PREHOOK: query: insert overwrite table hive_test_smb_bucket1 partition (ds='2010-10-15') select key, value from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@hive_test_smb_bucket1@ds=2010-10-15
+[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
+POSTHOOK: query: insert overwrite table hive_test_smb_bucket1 partition (ds='2010-10-15') select key, value from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@hive_test_smb_bucket1@ds=2010-10-15
+POSTHOOK: Lineage: hive_test_smb_bucket1 PARTITION(ds=2010-10-15).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: hive_test_smb_bucket1 PARTITION(ds=2010-10-15).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table hive_test_smb_bucket2 partition (ds='2010-10-15') select key, value from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@hive_test_smb_bucket2@ds=2010-10-15
+[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
+POSTHOOK: query: insert overwrite table hive_test_smb_bucket2 partition (ds='2010-10-15') select key, value from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@hive_test_smb_bucket2@ds=2010-10-15
+POSTHOOK: Lineage: hive_test_smb_bucket2 PARTITION(ds=2010-10-15).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: hive_test_smb_bucket2 PARTITION(ds=2010-10-15).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: explain
+create table smb_mapjoin9_results as
+SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+PREHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: query: explain
+create table smb_mapjoin9_results as
+SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+POSTHOOK: type: CREATETABLE_AS_SELECT
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-4 depends on stages: Stage-2, Stage-0
+ Stage-3 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE
+ value expressions: value (type: string), ds (type: string)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: key (type: int)
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE
+ Reducer 2
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0}
+ 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1}
+ outputColumnNames: _col0, _col6, _col7, _col8
+ Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col6 (type: int), _col7 (type: string), _col8 (type: string), _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.smb_mapjoin9_results
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-4
+ Create Table Operator:
+ Create Table
+ columns: k1 int, value string, ds string, k2 int
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+ serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.smb_mapjoin9_results
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-0
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: create table smb_mapjoin9_results as
+SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@hive_test_smb_bucket1
+PREHOOK: Input: default@hive_test_smb_bucket1@ds=2010-10-15
+PREHOOK: Input: default@hive_test_smb_bucket2
+PREHOOK: Input: default@hive_test_smb_bucket2@ds=2010-10-15
+PREHOOK: Output: database:default
+PREHOOK: Output: default@smb_mapjoin9_results
+[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to
+POSTHOOK: query: create table smb_mapjoin9_results as
+SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@hive_test_smb_bucket1
+POSTHOOK: Input: default@hive_test_smb_bucket1@ds=2010-10-15
+POSTHOOK: Input: default@hive_test_smb_bucket2
+POSTHOOK: Input: default@hive_test_smb_bucket2@ds=2010-10-15
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@smb_mapjoin9_results
+PREHOOK: query: drop table smb_mapjoin9_results
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@smb_mapjoin9_results
+PREHOOK: Output: default@smb_mapjoin9_results
+POSTHOOK: query: drop table smb_mapjoin9_results
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@smb_mapjoin9_results
+POSTHOOK: Output: default@smb_mapjoin9_results
+PREHOOK: query: drop table hive_test_smb_bucket1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@hive_test_smb_bucket1
+PREHOOK: Output: default@hive_test_smb_bucket1
+POSTHOOK: query: drop table hive_test_smb_bucket1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@hive_test_smb_bucket1
+POSTHOOK: Output: default@hive_test_smb_bucket1
+PREHOOK: query: drop table hive_test_smb_bucket2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@hive_test_smb_bucket2
+PREHOOK: Output: default@hive_test_smb_bucket2
+POSTHOOK: query: drop table hive_test_smb_bucket2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@hive_test_smb_bucket2
+POSTHOOK: Output: default@hive_test_smb_bucket2