You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/08/21 02:08:36 UTC
svn commit: r1619267 [2/4] - in /hive/branches/spark:
itests/src/test/resources/ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/
ql/src/test/results/clientpositive/spark/
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_19.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_19.q.out?rev=1619267&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_19.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_19.q.out Thu Aug 21 00:08:35 2014
@@ -0,0 +1,603 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- There is no need to write the temporary results of the sub-queries, and then read them
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- There is no need to write the temporary results of the sub-queries, and then read them
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl1
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT a.key, a.values
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT a.key, a.values
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP)
+ Reducer 4 <- Map 3 (GROUP)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT a.key, a.values
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT a.key, a.values
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@outputtbl1
+# col_name data_type comment
+
+key string
+values bigint
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE false
+ numFiles 2
+ numRows -1
+ rawDataSize -1
+ totalSize 40
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: select * from outputTbl1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+7 1
+2 1
+8 2
+3 1
+1 1
+7 1
+2 1
+8 2
+3 1
+1 1
+PREHOOK: query: -- filter should be fine
+explain
+insert overwrite table outputTbl1
+SELECT a.key, a.values
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a where a.key = 7
+PREHOOK: type: QUERY
+POSTHOOK: query: -- filter should be fine
+explain
+insert overwrite table outputTbl1
+SELECT a.key, a.values
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a where a.key = 7
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP)
+ Reducer 4 <- Map 3 (GROUP)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 30 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key = 7) (type: boolean)
+ Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '7' (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 30 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key = 7) (type: boolean)
+ Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: '7' (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 7 Data size: 7 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT a.key, a.values
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a where a.key = 7
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT a.key, a.values
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a where a.key = 7
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: select * from outputTbl1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+7 1
+7 1
+PREHOOK: query: -- filters and sub-queries should be fine
+explain
+insert overwrite table outputTbl1
+select key, values from
+(
+SELECT a.key + a.key as key, a.values
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+) b where b.key >= 7
+PREHOOK: type: QUERY
+POSTHOOK: query: -- filters and sub-queries should be fine
+explain
+insert overwrite table outputTbl1
+select key, values from
+(
+SELECT a.key + a.key as key, a.values
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+) b where b.key >= 7
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP)
+ Reducer 4 <- Map 3 (GROUP)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: (_col0 + _col0) (type: double), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 >= 7.0) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: double), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: (_col0 + _col0) (type: double), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 >= 7.0) (type: boolean)
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: double), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+PREHOOK: query: insert overwrite table outputTbl1
+select key, values from
+(
+SELECT a.key + a.key as key, a.values
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+) b where b.key >= 7
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+select key, values from
+(
+SELECT a.key + a.key as key, a.values
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+) b where b.key >= 7
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+14.0 1
+14.0 1
+16.0 2
+16.0 2
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_2.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_2.q.out?rev=1619267&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_2.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_2.q.out Thu Aug 21 00:08:35 2014
@@ -0,0 +1,256 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job)
+-- followed by select star and a file sink.
+-- There is no need to write the temporary results of the sub-queries, and then read them
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 3 subqueries is performed (exactly one of which requires a map-reduce job)
+-- followed by select star and a file sink.
+-- There is no need to write the temporary results of the sub-queries, and then read them
+-- again to process the union. The union can be removed completely.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl1
+POSTHOOK: query: create table outputTbl1(key string, values bigint) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, 1 as values from inputTbl1
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, 1 as values from inputTbl1
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+) a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 3 <- Map 2 (GROUP)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: string), UDFToLong(2) (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: string), UDFToLong(1) (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ Reducer 3
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, 1 as values from inputTbl1
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT *
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, 1 as values from inputTbl1
+ UNION ALL
+ SELECT key, 2 as values from inputTbl1
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@outputtbl1
+# col_name data_type comment
+
+key string
+values bigint
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE false
+ numFiles 3
+ numRows -1
+ rawDataSize -1
+ totalSize 68
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+1 1
+1 1
+1 2
+2 1
+2 1
+2 2
+3 1
+3 1
+3 2
+7 1
+7 1
+7 2
+8 1
+8 1
+8 2
+8 2
+8 2
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_20.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_20.q.out?rev=1619267&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_20.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_20.q.out Thu Aug 21 00:08:35 2014
@@ -0,0 +1,249 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select and a file sink
+-- However, the order of the columns in the select list is different. So, union cannot
+-- be removed.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23. The union is removed, the select (which changes the order of
+-- columns being selected) is pushed above the union.
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select and a file sink
+-- However, the order of the columns in the select list is different. So, union cannot
+-- be removed.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23. The union is removed, the select (which changes the order of
+-- columns being selected) is pushed above the union.
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(values bigint, key string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl1
+POSTHOOK: query: create table outputTbl1(values bigint, key string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT a.values, a.key
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT a.values, a.key
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP)
+ Reducer 4 <- Map 3 (GROUP)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: bigint), _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: bigint), _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT a.values, a.key
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT a.values, a.key
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@outputtbl1
+# col_name data_type comment
+
+values bigint
+key string
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE false
+ numFiles 2
+ numRows -1
+ rawDataSize -1
+ totalSize 40
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+1 1
+1 1
+1 2
+1 2
+1 3
+1 3
+1 7
+1 7
+2 8
+2 8
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_21.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_21.q.out?rev=1619267&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_21.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_21.q.out Thu Aug 21 00:08:35 2014
@@ -0,0 +1,247 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select and a file sink
+-- However, all the columns are not selected. So, union cannot
+-- be removed.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23. The union is removed, the select (which changes the order of
+-- columns being selected) is pushed above the union.
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select and a file sink
+-- However, all the columns are not selected. So, union cannot
+-- be removed.
+-- It does not matter, whether the output is merged or not. In this case, merging is turned
+-- off
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23. The union is removed, the select (which changes the order of
+-- columns being selected) is pushed above the union.
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl1
+POSTHOOK: query: create table outputTbl1(key string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT a.key
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table outputTbl1
+SELECT a.key
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP)
+ Reducer 4 <- Map 3 (GROUP)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+PREHOOK: query: insert overwrite table outputTbl1
+SELECT a.key
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: insert overwrite table outputTbl1
+SELECT a.key
+FROM (
+ SELECT key, count(1) as values from inputTbl1 group by key
+ UNION ALL
+ SELECT key, count(1) as values from inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@outputtbl1
+# col_name data_type comment
+
+key string
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE false
+ numFiles 2
+ numRows -1
+ rawDataSize -1
+ totalSize 20
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: select * from outputTbl1 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+1
+1
+2
+2
+3
+3
+7
+7
+8
+8
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_24.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_24.q.out?rev=1619267&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_24.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_24.q.out Thu Aug 21 00:08:35 2014
@@ -0,0 +1,245 @@
+PREHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- There is no need to write the temporary results of the sub-queries, and then read them
+-- again to process the union. The union can be removed completely.
+-- One sub-query has a double and the other sub-query has a bigint.
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@inputTbl1
+POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization
+-- Union of 2 map-reduce subqueries is performed followed by select star and a file sink
+-- There is no need to write the temporary results of the sub-queries, and then read them
+-- again to process the union. The union can be removed completely.
+-- One sub-query has a double and the other sub-query has a bigint.
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+-- Since this test creates sub-directories for the output table outputTbl1, it might be easier
+-- to run the test only on hadoop 23
+
+create table inputTbl1(key string, val string) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@inputTbl1
+PREHOOK: query: create table outputTbl1(key double, values bigint) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@outputTbl1
+POSTHOOK: query: create table outputTbl1(key double, values bigint) stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@outputTbl1
+PREHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@inputtbl1
+POSTHOOK: query: load data local inpath '../../data/files/T1.txt' into table inputTbl1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@inputtbl1
+PREHOOK: query: EXPLAIN
+INSERT OVERWRITE TABLE outputTbl1
+SELECT * FROM
+(
+ SELECT CAST(key AS DOUBLE) AS key, count(1) as values FROM inputTbl1 group by key
+ UNION ALL
+ SELECT CAST(key AS BIGINT) AS key, count(1) as values FROM inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+INSERT OVERWRITE TABLE outputTbl1
+SELECT * FROM
+(
+ SELECT CAST(key AS DOUBLE) AS key, count(1) as values FROM inputTbl1 group by key
+ UNION ALL
+ SELECT CAST(key AS BIGINT) AS key, count(1) as values FROM inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Spark
+ Edges:
+ Reducer 2 <- Map 1 (GROUP)
+ Reducer 4 <- Map 3 (GROUP)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: inputtbl1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: UDFToDouble(UDFToLong(_col0)) (type: double), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+ Reducer 4
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ Select Operator
+ expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.outputtbl1
+
+PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1
+SELECT * FROM
+(
+ SELECT CAST(key AS DOUBLE) AS key, count(1) as values FROM inputTbl1 group by key
+ UNION ALL
+ SELECT CAST(key AS BIGINT) AS key, count(1) as values FROM inputTbl1 group by key
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@inputtbl1
+PREHOOK: Output: default@outputtbl1
+POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1
+SELECT * FROM
+(
+ SELECT CAST(key AS DOUBLE) AS key, count(1) as values FROM inputTbl1 group by key
+ UNION ALL
+ SELECT CAST(key AS BIGINT) AS key, count(1) as values FROM inputTbl1 group by key
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@inputtbl1
+POSTHOOK: Output: default@outputtbl1
+POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), (inputtbl1)inputtbl1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: outputtbl1.values EXPRESSION [(inputtbl1)inputtbl1.null, (inputtbl1)inputtbl1.null, ]
+PREHOOK: query: desc formatted outputTbl1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@outputtbl1
+POSTHOOK: query: desc formatted outputTbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@outputtbl1
+# col_name data_type comment
+
+key double
+values bigint
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE false
+ numFiles 2
+ numRows -1
+ rawDataSize -1
+ totalSize 60
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: select * from outputTbl1 order by key, values
+PREHOOK: type: QUERY
+PREHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from outputTbl1 order by key, values
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@outputtbl1
+#### A masked pattern was here ####
+1.0 1
+1.0 1
+2.0 1
+2.0 1
+3.0 1
+3.0 1
+7.0 1
+7.0 1
+8.0 2
+8.0 2