You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sp...@apache.org on 2016/11/29 16:03:52 UTC
[1/2] hive git commit: HIVE-15114: Remove extra MoveTask operators
from the ConditionalTask (Sergio Pena, reviewed by Sahil Takiar and Aihua Xu)
Repository: hive
Updated Branches:
refs/heads/master 0d49b3684 -> e00b1a339
http://git-wip-us.apache.org/repos/asf/hive/blob/e00b1a33/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out
----------------------------------------------------------------------
diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out
new file mode 100644
index 0000000..c725c96c
--- /dev/null
+++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out
@@ -0,0 +1,655 @@
+PREHOOK: query: -- Insert unpartitioned table;
+DROP TABLE table1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- Insert unpartitioned table;
+DROP TABLE table1
+POSTHOOK: type: DROPTABLE
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Input: ### test.blobstore.path ###/table1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table1
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Input: ### test.blobstore.path ###/table1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table1
+PREHOOK: query: INSERT OVERWRITE TABLE table1 VALUES (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@table1
+POSTHOOK: query: INSERT OVERWRITE TABLE table1 VALUES (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@table1
+PREHOOK: query: SELECT * FROM table1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM table1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table1
+#### A masked pattern was here ####
+1
+PREHOOK: query: INSERT OVERWRITE TABLE table1 VALUES (2)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@table1
+POSTHOOK: query: INSERT OVERWRITE TABLE table1 VALUES (2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@table1
+PREHOOK: query: SELECT * FROM table1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM table1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table1
+#### A masked pattern was here ####
+2
+PREHOOK: query: EXPLAIN EXTENDED INSERT OVERWRITE TABLE table1 VALUES (1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED INSERT OVERWRITE TABLE table1 VALUES (1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-2 depends on stages: Stage-0, Stage-4, Stage-6
+ Stage-3
+ Stage-0 depends on stages: Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: values__tmp__table__3
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: UDFToInteger(tmp_values_col1) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ directory: ### BLOBSTORE_STAGING_PATH ###
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+ Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ###
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 1
+ numRows 1
+ rawDataSize 1
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: Values__Tmp__Table__3
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns tmp_values_col1
+ columns.comments
+ columns.types string
+#### A masked pattern was here ####
+ name default.values__tmp__table__3
+ serialization.ddl struct values__tmp__table__3 { string tmp_values_col1}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns tmp_values_col1
+ columns.comments
+ columns.types string
+#### A masked pattern was here ####
+ name default.values__tmp__table__3
+ serialization.ddl struct values__tmp__table__3 { string tmp_values_col1}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.values__tmp__table__3
+ name: default.values__tmp__table__3
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ tables:
+ replace: true
+ source: ### BLOBSTORE_STAGING_PATH ###
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 1
+ numRows 1
+ rawDataSize 1
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+ Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ###
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ directory: ### BLOBSTORE_STAGING_PATH ###
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 1
+ numRows 1
+ rawDataSize 1
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+ Path -> Partition:
+ ### BLOBSTORE_STAGING_PATH ###
+ Partition
+ base file name: -ext-10002
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 1
+ numRows 1
+ rawDataSize 1
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 1
+ numRows 1
+ rawDataSize 1
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+ name: default.table1
+ Truncated Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ source: ### BLOBSTORE_STAGING_PATH ###
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 1
+ numRows 1
+ rawDataSize 1
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ directory: ### BLOBSTORE_STAGING_PATH ###
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 1
+ numRows 1
+ rawDataSize 1
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+ Path -> Partition:
+ ### BLOBSTORE_STAGING_PATH ###
+ Partition
+ base file name: -ext-10002
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 1
+ numRows 1
+ rawDataSize 1
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 1
+ numRows 1
+ rawDataSize 1
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+ name: default.table1
+ Truncated Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+
+ Stage: Stage-6
+ Move Operator
+ tables:
+ replace: true
+ source: ### BLOBSTORE_STAGING_PATH ###
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 1
+ numRows 1
+ rawDataSize 1
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 2
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+
+PREHOOK: query: DROP TABLE table1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@table1
+PREHOOK: Output: default@table1
+POSTHOOK: query: DROP TABLE table1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@table1
+POSTHOOK: Output: default@table1
+PREHOOK: query: -- Insert dynamic partitions;
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Input: ### test.blobstore.path ###/table1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table1
+POSTHOOK: query: -- Insert dynamic partitions;
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Input: ### test.blobstore.path ###/table1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table1
+PREHOOK: query: INSERT OVERWRITE TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__4
+PREHOOK: Output: default@table1
+POSTHOOK: query: INSERT OVERWRITE TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__4
+POSTHOOK: Output: default@table1@key=101
+POSTHOOK: Output: default@table1@key=202
+POSTHOOK: Output: default@table1@key=303
+POSTHOOK: Output: default@table1@key=404
+POSTHOOK: Output: default@table1@key=505
+POSTHOOK: Lineage: table1 PARTITION(key=101).id EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1 PARTITION(key=202).id EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1 PARTITION(key=303).id EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1 PARTITION(key=404).id EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1 PARTITION(key=505).id EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT * FROM table1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table1
+PREHOOK: Input: default@table1@key=101
+PREHOOK: Input: default@table1@key=202
+PREHOOK: Input: default@table1@key=303
+PREHOOK: Input: default@table1@key=404
+PREHOOK: Input: default@table1@key=505
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM table1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table1
+POSTHOOK: Input: default@table1@key=101
+POSTHOOK: Input: default@table1@key=202
+POSTHOOK: Input: default@table1@key=303
+POSTHOOK: Input: default@table1@key=404
+POSTHOOK: Input: default@table1@key=505
+#### A masked pattern was here ####
+1 101
+2 202
+3 303
+4 404
+5 505
+PREHOOK: query: INSERT OVERWRITE TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__5
+PREHOOK: Output: default@table1
+POSTHOOK: query: INSERT OVERWRITE TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__5
+POSTHOOK: Output: default@table1@key=101
+POSTHOOK: Output: default@table1@key=202
+POSTHOOK: Output: default@table1@key=303
+POSTHOOK: Output: default@table1@key=404
+POSTHOOK: Output: default@table1@key=505
+POSTHOOK: Lineage: table1 PARTITION(key=101).id EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1 PARTITION(key=202).id EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1 PARTITION(key=303).id EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1 PARTITION(key=404).id EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1 PARTITION(key=505).id EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT * FROM table1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table1
+PREHOOK: Input: default@table1@key=101
+PREHOOK: Input: default@table1@key=202
+PREHOOK: Input: default@table1@key=303
+PREHOOK: Input: default@table1@key=404
+PREHOOK: Input: default@table1@key=505
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM table1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table1
+POSTHOOK: Input: default@table1@key=101
+POSTHOOK: Input: default@table1@key=202
+POSTHOOK: Input: default@table1@key=303
+POSTHOOK: Input: default@table1@key=404
+POSTHOOK: Input: default@table1@key=505
+#### A masked pattern was here ####
+1 101
+2 202
+3 303
+4 404
+5 505
+PREHOOK: query: EXPLAIN EXTENDED INSERT OVERWRITE TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505')
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED INSERT OVERWRITE TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: values__tmp__table__6
+ Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: UDFToInteger(tmp_values_col1) (type: int), tmp_values_col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), '_bucket_number' (type: string)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: int)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: Values__Tmp__Table__6
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns tmp_values_col1,tmp_values_col2
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.values__tmp__table__6
+ serialization.ddl struct values__tmp__table__6 { string tmp_values_col1, string tmp_values_col2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns tmp_values_col1,tmp_values_col2
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.values__tmp__table__6
+ serialization.ddl struct values__tmp__table__6 { string tmp_values_col1, string tmp_values_col2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.values__tmp__table__6
+ name: default.values__tmp__table__6
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), KEY._col1 (type: string), KEY.'_bucket_number' (type: string)
+ outputColumnNames: _col0, _col1, '_bucket_number'
+ Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ directory: ### BLOBSTORE_STAGING_PATH ###
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ###
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count 2
+ bucket_field_name id
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ partition_columns key
+ partition_columns.types string
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ key
+ replace: true
+ source: ### BLOBSTORE_STAGING_PATH ###
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count 2
+ bucket_field_name id
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ partition_columns key
+ partition_columns.types string
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+ Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ###
+
+PREHOOK: query: DROP TABLE table1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@table1
+PREHOOK: Output: default@table1
+POSTHOOK: query: DROP TABLE table1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@table1
+POSTHOOK: Output: default@table1
http://git-wip-us.apache.org/repos/asf/hive/blob/e00b1a33/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index 88d5afa..adc1188 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -73,8 +73,10 @@ import org.apache.hadoop.hive.ql.io.merge.MergeFileWork;
import org.apache.hadoop.hive.ql.io.orc.OrcFileStripeMergeInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat;
+import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx;
import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPruner;
@@ -94,6 +96,7 @@ import org.apache.hadoop.hive.ql.plan.FileMergeDesc;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc;
import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
+import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
import org.apache.hadoop.hive.ql.plan.MapredWork;
@@ -1431,11 +1434,140 @@ public final class GenMapRedUtils {
parentTask.addDependentTask(mvTask);
}
} else {
- parentTask.addDependentTask(mvTask);
+ if (BlobStorageUtils.areOptimizationsEnabled(hconf) && parentTask instanceof MoveTask && areMoveTasksOnSameBlobStorage(hconf, (Task<MoveWork>)parentTask, mvTask)) {
+ mergeMoveTasks((Task<MoveWork>)parentTask, mvTask);
+ } else {
+ parentTask.addDependentTask(mvTask);
+ }
}
}
}
+ /**
+ * Compare if moveTask1 source path is on the same filesystem as moveTask2 destination path.
+ *
+ * @param hconf Configuration object
+ * @param moveTask1 First MoveTask where the source will be compared.
+ * @param moveTask2 Second MoveTask where the destination will be compared.
+ * @return True if source/destination are on the same filesystem; False otherwise.
+ */
+ private static boolean areMoveTasksOnSameBlobStorage(HiveConf hconf, Task<MoveWork> moveTask1, Task<MoveWork> moveTask2) {
+ Path sourcePath1, targetPath2;
+
+ MoveWork moveWork1 = moveTask1.getWork();
+ MoveWork moveWork2 = moveTask2.getWork();
+
+ // Let's not merge the tasks in case both file and table work are present. This should not
+ // be configured this way, but the API allows you to do that.
+ if (moveWork1.getLoadFileWork() != null && moveWork1.getLoadTableWork() != null) { return false; }
+ if (moveWork2.getLoadFileWork() != null && moveWork2.getLoadTableWork() != null) { return false; }
+
+ if (moveWork1.getLoadFileWork() != null) {
+ sourcePath1 = moveWork1.getLoadFileWork().getSourcePath();
+ } else if (moveWork1.getLoadTableWork() != null) {
+ sourcePath1 = moveWork1.getLoadTableWork().getSourcePath();
+ } else {
+ // Multi-files is not supported on this optimization
+ return false;
+ }
+
+ if (moveWork2.getLoadFileWork() != null) {
+ targetPath2 = moveWork2.getLoadFileWork().getTargetDir();
+ } else if (moveWork2.getLoadTableWork() != null) {
+ targetPath2 = getTableLocationPath(hconf, moveWork2.getLoadTableWork().getTable());
+ } else {
+ // Multi-files is not supported on this optimization
+ return false;
+ }
+
+ if (sourcePath1 != null && targetPath2 != null && BlobStorageUtils.isBlobStoragePath(hconf, sourcePath1)) {
+ return sourcePath1.toUri().getScheme().equals(targetPath2.toUri().getScheme());
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Returns the table location path from a TableDesc object.
+ *
+ * @param hconf Configuration object.
+ * @param tableDesc Table description from where to get the table name.
+ * @return The path where the table is located.
+ */
+ private static Path getTableLocationPath(final HiveConf hconf, final TableDesc tableDesc) {
+ Table table = null;
+ try {
+ Hive hive = Hive.get(hconf);
+ table = hive.getTable(tableDesc.getTableName());
+ } catch (HiveException e) {
+ LOG.warn("Unable to get the table location path for: " + tableDesc.getTableName(), e);
+ }
+
+ return (table != null) ? table.getPath() : null;
+ }
+
+ /**
+ * Creates a new MoveTask that uses the moveTask1 source and moveTask2 destination as new
+ * source/destination paths. This function is useful when two MoveTask are found on the
+ * execution plan, and they are join each other.
+ *
+ * @param moveTask1 First MoveTask where the source path will be used.
+ * @param moveTask2 Second MoveTask where the destination path will be used.
+ */
+ private static void mergeMoveTasks(Task<MoveWork> moveTask1, Task<MoveWork> moveTask2) {
+ Path sourcePath1;
+ LoadTableDesc loadTableDesc = null;
+ LoadFileDesc loadFileDesc = null;
+
+ MoveWork moveWork1 = moveTask1.getWork();
+ MoveWork moveWork2 = moveTask2.getWork();
+
+ // Let's not merge the tasks in case both file and table work are present. This should not
+ // be configured this way, but the API allows you to do that.
+ if (moveWork1.getLoadFileWork() != null && moveWork1.getLoadTableWork() != null) { return; }
+ if (moveWork2.getLoadFileWork() != null && moveWork2.getLoadTableWork() != null) { return; }
+
+ if (moveWork1.getLoadFileWork() != null) {
+ sourcePath1 = moveTask1.getWork().getLoadFileWork().getSourcePath();
+ } else if (moveWork1.getLoadTableWork() != null) {
+ sourcePath1 = moveTask1.getWork().getLoadTableWork().getSourcePath();
+ } else {
+ // Multi-files is not supported on this optimization
+ return;
+ }
+
+ if (moveTask2.getWork().getLoadFileWork() != null) {
+ loadFileDesc = new LoadFileDesc(
+ sourcePath1,
+ moveWork2.getLoadFileWork().getTargetDir(),
+ moveWork2.getLoadFileWork().getIsDfsDir(),
+ moveWork2.getLoadFileWork().getColumns(),
+ moveWork2.getLoadFileWork().getColumnTypes()
+ );
+ } else if (moveTask2.getWork().getLoadTableWork() != null) {
+ loadTableDesc = new LoadTableDesc(
+ sourcePath1,
+ moveWork2.getLoadTableWork().getTable(),
+ moveWork2.getLoadTableWork().getPartitionSpec(),
+ moveWork2.getLoadTableWork().getReplace(),
+ moveWork2.getLoadTableWork().getWriteType()
+ );
+ } else {
+ // Multi-files is not supported on this optimization
+ return;
+ }
+
+ moveWork1.setLoadTableWork(loadTableDesc);
+ moveWork1.setLoadFileWork(loadFileDesc);
+ moveWork1.setCheckFileFormat(moveWork2.getCheckFileFormat());
+
+ // Link task2 dependent tasks to MoveTask1
+ if (moveTask2.getDependentTasks() != null) {
+ for (Task dependentTask : moveTask2.getDependentTasks()) {
+ moveTask1.addDependentTask(dependentTask);
+ }
+ }
+ }
/**
* Add the StatsTask as a dependent task of the MoveTask
http://git-wip-us.apache.org/repos/asf/hive/blob/e00b1a33/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java
new file mode 100644
index 0000000..e6ec445
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java
@@ -0,0 +1,199 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.CompilationOpContext;
+import org.apache.hadoop.hive.ql.exec.*;
+import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
+import org.apache.hadoop.hive.ql.io.HiveInputFormat;
+import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
+import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
+import org.apache.hadoop.hive.ql.plan.MoveWork;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class TestGenMapRedUtilsCreateConditionalTask {
+ private static HiveConf hiveConf;
+
+ private Task dummyMRTask;
+
+ @BeforeClass
+ public static void initializeSessionState() {
+ hiveConf = new HiveConf();
+ }
+
+ @Before
+ public void setUp() {
+ dummyMRTask = new MapRedTask();
+ }
+
+ @Test
+ public void testConditionalMoveTaskIsOptimized() throws SemanticException {
+ hiveConf.set(HiveConf.ConfVars.HIVE_BLOBSTORE_OPTIMIZATIONS_ENABLED.varname, "true");
+
+ Path sinkDirName = new Path("s3a://bucket/scratch/-ext-10002");
+ FileSinkOperator fileSinkOperator = createFileSinkOperator(sinkDirName);
+
+ Path finalDirName = new Path("s3a://bucket/scratch/-ext-10000");
+ Path tableLocation = new Path("s3a://bucket/warehouse/table");
+ Task<MoveWork> moveTask = createMoveTask(finalDirName, tableLocation);
+ List<Task<MoveWork>> moveTaskList = Arrays.asList(moveTask);
+
+ GenMapRedUtils.createMRWorkForMergingFiles(fileSinkOperator, finalDirName, null, moveTaskList, hiveConf, dummyMRTask);
+ ConditionalTask conditionalTask = (ConditionalTask)dummyMRTask.getChildTasks().get(0);
+ Task<? extends Serializable> moveOnlyTask = conditionalTask.getListTasks().get(0);
+ Task<? extends Serializable> mergeOnlyTask = conditionalTask.getListTasks().get(1);
+ Task<? extends Serializable> mergeAndMoveTask = conditionalTask.getListTasks().get(2);
+
+ /*
+ * OPTIMIZATION
+ * The ConditionalTask avoids linking 2 MoveTask that are expensive on blobstorage systems. Instead of
+ * linking, it creates one MoveTask where the source is the first MoveTask source, and target is the
+ * second MoveTask target.
+ */
+
+ // Verify moveOnlyTask is optimized
+ assertNull(moveOnlyTask.getChildTasks());
+ verifyMoveTask(moveOnlyTask, sinkDirName, tableLocation);
+
+ // Verify mergeOnlyTask is NOT optimized (a merge task writes directly to finalDirName, then a MoveTask is executed)
+ assertEquals(1, mergeOnlyTask.getChildTasks().size());
+ verifyMoveTask(mergeOnlyTask.getChildTasks().get(0), finalDirName, tableLocation);
+
+ // Verify mergeAndMoveTask is optimized
+ assertEquals(1, mergeAndMoveTask.getChildTasks().size());
+ assertNull(mergeAndMoveTask.getChildTasks().get(0).getChildTasks());
+ verifyMoveTask(mergeAndMoveTask.getChildTasks().get(0), sinkDirName, tableLocation);
+ }
+
+ @Test
+ public void testConditionalMoveTaskIsNotOptimized() throws SemanticException {
+ hiveConf.set(HiveConf.ConfVars.HIVE_BLOBSTORE_OPTIMIZATIONS_ENABLED.varname, "false");
+
+ Path sinkDirName = new Path("s3a://bucket/scratch/-ext-10002");
+ FileSinkOperator fileSinkOperator = createFileSinkOperator(sinkDirName);
+
+ Path finalDirName = new Path("s3a://bucket/scratch/-ext-10000");
+ Path tableLocation = new Path("s3a://bucket/warehouse/table");
+ Task<MoveWork> moveTask = createMoveTask(finalDirName, tableLocation);
+ List<Task<MoveWork>> moveTaskList = Arrays.asList(moveTask);
+
+ GenMapRedUtils.createMRWorkForMergingFiles(fileSinkOperator, finalDirName, null, moveTaskList, hiveConf, dummyMRTask);
+ ConditionalTask conditionalTask = (ConditionalTask)dummyMRTask.getChildTasks().get(0);
+ Task<? extends Serializable> moveOnlyTask = conditionalTask.getListTasks().get(0);
+ Task<? extends Serializable> mergeOnlyTask = conditionalTask.getListTasks().get(1);
+ Task<? extends Serializable> mergeAndMoveTask = conditionalTask.getListTasks().get(2);
+
+ // Verify moveOnlyTask is NOT optimized
+ assertEquals(1, moveOnlyTask.getChildTasks().size());
+ verifyMoveTask(moveOnlyTask, sinkDirName, finalDirName);
+ verifyMoveTask(moveOnlyTask.getChildTasks().get(0), finalDirName, tableLocation);
+
+ // Verify mergeOnlyTask is NOT optimized
+ assertEquals(1, mergeOnlyTask.getChildTasks().size());
+ verifyMoveTask(mergeOnlyTask.getChildTasks().get(0), finalDirName, tableLocation);
+
+ // Verify mergeAndMoveTask is NOT optimized
+ assertEquals(1, mergeAndMoveTask.getChildTasks().size());
+ assertEquals(1, mergeAndMoveTask.getChildTasks().get(0).getChildTasks().size());
+ verifyMoveTask(mergeAndMoveTask.getChildTasks().get(0), sinkDirName, finalDirName);
+ verifyMoveTask(mergeAndMoveTask.getChildTasks().get(0).getChildTasks().get(0), finalDirName, tableLocation);
+ }
+
+ @Test
+ public void testConditionalMoveOnHdfsIsNotOptimized() throws SemanticException {
+ hiveConf.set(HiveConf.ConfVars.HIVE_BLOBSTORE_OPTIMIZATIONS_ENABLED.varname, "true");
+
+ Path sinkDirName = new Path("hdfs://bucket/scratch/-ext-10002");
+ FileSinkOperator fileSinkOperator = createFileSinkOperator(sinkDirName);
+
+ Path finalDirName = new Path("hdfs://bucket/scratch/-ext-10000");
+ Path tableLocation = new Path("hdfs://bucket/warehouse/table");
+ Task<MoveWork> moveTask = createMoveTask(finalDirName, tableLocation);
+ List<Task<MoveWork>> moveTaskList = Arrays.asList(moveTask);
+
+ GenMapRedUtils.createMRWorkForMergingFiles(fileSinkOperator, finalDirName, null, moveTaskList, hiveConf, dummyMRTask);
+ ConditionalTask conditionalTask = (ConditionalTask)dummyMRTask.getChildTasks().get(0);
+ Task<? extends Serializable> moveOnlyTask = conditionalTask.getListTasks().get(0);
+ Task<? extends Serializable> mergeOnlyTask = conditionalTask.getListTasks().get(1);
+ Task<? extends Serializable> mergeAndMoveTask = conditionalTask.getListTasks().get(2);
+
+ // Verify moveOnlyTask is NOT optimized
+ assertEquals(1, moveOnlyTask.getChildTasks().size());
+ verifyMoveTask(moveOnlyTask, sinkDirName, finalDirName);
+ verifyMoveTask(moveOnlyTask.getChildTasks().get(0), finalDirName, tableLocation);
+
+ // Verify mergeOnlyTask is NOT optimized
+ assertEquals(1, mergeOnlyTask.getChildTasks().size());
+ verifyMoveTask(mergeOnlyTask.getChildTasks().get(0), finalDirName, tableLocation);
+
+ // Verify mergeAndMoveTask is NOT optimized
+ assertEquals(1, mergeAndMoveTask.getChildTasks().size());
+ assertEquals(1, mergeAndMoveTask.getChildTasks().get(0).getChildTasks().size());
+ verifyMoveTask(mergeAndMoveTask.getChildTasks().get(0), sinkDirName, finalDirName);
+ verifyMoveTask(mergeAndMoveTask.getChildTasks().get(0).getChildTasks().get(0), finalDirName, tableLocation);
+ }
+
+ private FileSinkOperator createFileSinkOperator(Path finalDirName) {
+ FileSinkOperator fileSinkOperator = mock(FileSinkOperator.class);
+
+ TableDesc tableDesc = new TableDesc(HiveInputFormat.class, HiveOutputFormat.class, new Properties());
+ FileSinkDesc fileSinkDesc = new FileSinkDesc(finalDirName, tableDesc, false);
+ fileSinkDesc.setDirName(finalDirName);
+
+ when(fileSinkOperator.getConf()).thenReturn(fileSinkDesc);
+ when(fileSinkOperator.getSchema()).thenReturn(mock(RowSchema.class));
+ fileSinkDesc.setTableInfo(tableDesc);
+
+ when(fileSinkOperator.getCompilationOpContext()).thenReturn(mock(CompilationOpContext.class));
+
+ return fileSinkOperator;
+ }
+
+ private Task<MoveWork> createMoveTask(Path source, Path destination) {
+ Task<MoveWork> moveTask = mock(MoveTask.class);
+ MoveWork moveWork = new MoveWork();
+ moveWork.setLoadFileWork(new LoadFileDesc(source, destination, true, null, null));
+
+ when(moveTask.getWork()).thenReturn(moveWork);
+
+ return moveTask;
+ }
+
+ private void verifyMoveTask(Task<? extends Serializable> task, Path source, Path target) {
+ MoveTask moveTask = (MoveTask)task;
+ assertEquals(source, moveTask.getWork().getLoadFileWork().getSourcePath());
+ assertEquals(target, moveTask.getWork().getLoadFileWork().getTargetDir());
+ }
+}
[2/2] hive git commit: HIVE-15114: Remove extra MoveTask operators
from the ConditionalTask (Sergio Pena, reviewed by Sahil Takiar and Aihua Xu)
Posted by sp...@apache.org.
HIVE-15114: Remove extra MoveTask operators from the ConditionalTask (Sergio Pena, reviewed by Sahil Takiar and Aihua Xu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e00b1a33
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e00b1a33
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e00b1a33
Branch: refs/heads/master
Commit: e00b1a3392bb4474f89ca8eea4f253e185596299
Parents: 0d49b36
Author: Sergio Pena <se...@cloudera.com>
Authored: Tue Nov 29 10:03:13 2016 -0600
Committer: Sergio Pena <se...@cloudera.com>
Committed: Tue Nov 29 10:03:13 2016 -0600
----------------------------------------------------------------------
.../test/queries/clientpositive/insert_into.q | 8 -
.../queries/clientpositive/insert_into_table.q | 20 +
.../clientpositive/insert_overwrite_directory.q | 27 +
.../clientpositive/insert_overwrite_table.q | 22 +
.../results/clientpositive/insert_into.q.out | 343 ----------
.../clientpositive/insert_into_table.q.out | 599 +++++++++++++++++
.../insert_overwrite_directory.q.out | 653 ++++++++++++++++++
.../clientpositive/insert_overwrite_table.q.out | 655 +++++++++++++++++++
.../hive/ql/optimizer/GenMapRedUtils.java | 134 +++-
...TestGenMapRedUtilsCreateConditionalTask.java | 199 ++++++
10 files changed, 2308 insertions(+), 352 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e00b1a33/itests/hive-blobstore/src/test/queries/clientpositive/insert_into.q
----------------------------------------------------------------------
diff --git a/itests/hive-blobstore/src/test/queries/clientpositive/insert_into.q b/itests/hive-blobstore/src/test/queries/clientpositive/insert_into.q
deleted file mode 100644
index c9ed57d..0000000
--- a/itests/hive-blobstore/src/test/queries/clientpositive/insert_into.q
+++ /dev/null
@@ -1,8 +0,0 @@
-set hive.blobstore.use.blobstore.as.scratchdir=true;
-
-DROP TABLE qtest;
-CREATE TABLE qtest (value int) LOCATION '${hiveconf:test.blobstore.path.unique}/qtest/';
-INSERT INTO qtest VALUES (1), (10), (100), (1000);
-INSERT INTO qtest VALUES (2), (20), (200), (2000);
-EXPLAIN EXTENDED INSERT INTO qtest VALUES (1), (10), (100), (1000);
-SELECT * FROM qtest;
http://git-wip-us.apache.org/repos/asf/hive/blob/e00b1a33/itests/hive-blobstore/src/test/queries/clientpositive/insert_into_table.q
----------------------------------------------------------------------
diff --git a/itests/hive-blobstore/src/test/queries/clientpositive/insert_into_table.q b/itests/hive-blobstore/src/test/queries/clientpositive/insert_into_table.q
new file mode 100644
index 0000000..25e2e70
--- /dev/null
+++ b/itests/hive-blobstore/src/test/queries/clientpositive/insert_into_table.q
@@ -0,0 +1,20 @@
+SET hive.blobstore.optimizations.enabled=true;
+SET hive.blobstore.use.blobstore.as.scratchdir=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+-- Insert unpartitioned table;
+DROP TABLE table1;
+CREATE TABLE table1 (id int) LOCATION '${hiveconf:test.blobstore.path.unique}/table1/';
+INSERT INTO TABLE table1 VALUES (1);
+INSERT INTO TABLE table1 VALUES (2);
+SELECT * FROM table1;
+EXPLAIN EXTENDED INSERT INTO TABLE table1 VALUES (1);
+DROP TABLE table1;
+
+-- Insert dynamic partitions;
+CREATE TABLE table1 (id int) partitioned by (key string) clustered by (id) into 2 buckets LOCATION '${hiveconf:test.blobstore.path.unique}/table1/';
+INSERT INTO TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505');
+INSERT INTO TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505');
+SELECT * FROM table1;
+EXPLAIN EXTENDED INSERT INTO TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505');
+DROP TABLE table1;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/e00b1a33/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_directory.q
----------------------------------------------------------------------
diff --git a/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_directory.q b/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_directory.q
new file mode 100644
index 0000000..f1b5a0b
--- /dev/null
+++ b/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_directory.q
@@ -0,0 +1,27 @@
+SET hive.blobstore.optimizations.enabled=true;
+SET hive.blobstore.use.blobstore.as.scratchdir=true;
+
+-- Create a simple source table;
+DROP TABLE table1;
+CREATE TABLE table1 (id int, key string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
+INSERT INTO TABLE table1 VALUES (1, 'k1');
+INSERT INTO TABLE table1 VALUES (2, 'k2');
+
+-- Write and verify data on the directory;
+INSERT OVERWRITE DIRECTORY '${hiveconf:test.blobstore.path.unique}/table1.dir/' SELECT * FROM table1;
+dfs -cat ${hiveconf:test.blobstore.path.unique}/table1.dir/000000_0;
+
+-- Write and verify data using FROM ... INSERT OVERWRITE DIRECTORY;
+FROM table1
+INSERT OVERWRITE DIRECTORY '${hiveconf:test.blobstore.path.unique}/table1.dir/' SELECT id
+INSERT OVERWRITE DIRECTORY '${hiveconf:test.blobstore.path.unique}/table2.dir/' SELECT key;
+
+dfs -cat ${hiveconf:test.blobstore.path.unique}/table1.dir/000000_0;
+dfs -cat ${hiveconf:test.blobstore.path.unique}/table2.dir/000000_0;
+
+-- Verify plan is optimizedl
+EXPLAIN EXTENDED INSERT OVERWRITE DIRECTORY '${hiveconf:test.blobstore.path.unique}/table1.dir/' SELECT * FROM table1;
+
+EXPLAIN EXTENDED FROM table1
+ INSERT OVERWRITE DIRECTORY '${hiveconf:test.blobstore.path.unique}/table1.dir/' SELECT id
+ INSERT OVERWRITE DIRECTORY '${hiveconf:test.blobstore.path.unique}/table2.dir/' SELECT key;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/e00b1a33/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_table.q
----------------------------------------------------------------------
diff --git a/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_table.q b/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_table.q
new file mode 100644
index 0000000..846b2b1
--- /dev/null
+++ b/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_table.q
@@ -0,0 +1,22 @@
+SET hive.blobstore.optimizations.enabled=true;
+SET hive.blobstore.use.blobstore.as.scratchdir=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+-- Insert unpartitioned table;
+DROP TABLE table1;
+CREATE TABLE table1 (id int) LOCATION '${hiveconf:test.blobstore.path.unique}/table1/';
+INSERT OVERWRITE TABLE table1 VALUES (1);
+SELECT * FROM table1;
+INSERT OVERWRITE TABLE table1 VALUES (2);
+SELECT * FROM table1;
+EXPLAIN EXTENDED INSERT OVERWRITE TABLE table1 VALUES (1);
+DROP TABLE table1;
+
+-- Insert dynamic partitions;
+CREATE TABLE table1 (id int) partitioned by (key string) clustered by (id) into 2 buckets LOCATION '${hiveconf:test.blobstore.path.unique}/table1/';
+INSERT OVERWRITE TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505');
+SELECT * FROM table1;
+INSERT OVERWRITE TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505');
+SELECT * FROM table1;
+EXPLAIN EXTENDED INSERT OVERWRITE TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505');
+DROP TABLE table1;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/e00b1a33/itests/hive-blobstore/src/test/results/clientpositive/insert_into.q.out
----------------------------------------------------------------------
diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_into.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_into.q.out
deleted file mode 100644
index 00ad136..0000000
--- a/itests/hive-blobstore/src/test/results/clientpositive/insert_into.q.out
+++ /dev/null
@@ -1,343 +0,0 @@
-PREHOOK: query: DROP TABLE qtest
-PREHOOK: type: DROPTABLE
-POSTHOOK: query: DROP TABLE qtest
-POSTHOOK: type: DROPTABLE
-#### A masked pattern was here ####
-PREHOOK: type: CREATETABLE
-PREHOOK: Input: ### test.blobstore.path ###/qtest
-PREHOOK: Output: database:default
-PREHOOK: Output: default@qtest
-#### A masked pattern was here ####
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Input: ### test.blobstore.path ###/qtest
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@qtest
-PREHOOK: query: INSERT INTO qtest VALUES (1), (10), (100), (1000)
-PREHOOK: type: QUERY
-PREHOOK: Input: default@values__tmp__table__1
-PREHOOK: Output: default@qtest
-POSTHOOK: query: INSERT INTO qtest VALUES (1), (10), (100), (1000)
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@values__tmp__table__1
-POSTHOOK: Output: default@qtest
-POSTHOOK: Lineage: qtest.value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
-PREHOOK: query: INSERT INTO qtest VALUES (2), (20), (200), (2000)
-PREHOOK: type: QUERY
-PREHOOK: Input: default@values__tmp__table__2
-PREHOOK: Output: default@qtest
-POSTHOOK: query: INSERT INTO qtest VALUES (2), (20), (200), (2000)
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@values__tmp__table__2
-POSTHOOK: Output: default@qtest
-POSTHOOK: Lineage: qtest.value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
-PREHOOK: query: EXPLAIN EXTENDED INSERT INTO qtest VALUES (1), (10), (100), (1000)
-PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN EXTENDED INSERT INTO qtest VALUES (1), (10), (100), (1000)
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
- Stage-4
- Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
- Stage-2 depends on stages: Stage-0
- Stage-3
- Stage-5
- Stage-6 depends on stages: Stage-5
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: values__tmp__table__3
- Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE
- GatherStats: false
- Select Operator
- expressions: UDFToInteger(tmp_values_col1) (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
- directory: ### BLOBSTORE_STAGING_PATH ###
- NumFilesPerFileSink: 1
- Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE
- Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ###
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns value
- columns.comments
- columns.types int
-#### A masked pattern was here ####
- location ### test.blobstore.path ###/qtest
- name default.qtest
- numFiles 2
- serialization.ddl struct qtest { i32 value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 28
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.qtest
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: Values__Tmp__Table__3
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns tmp_values_col1
- columns.comments
- columns.types string
-#### A masked pattern was here ####
- name default.values__tmp__table__3
- serialization.ddl struct values__tmp__table__3 { string tmp_values_col1}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns tmp_values_col1
- columns.comments
- columns.types string
-#### A masked pattern was here ####
- name default.values__tmp__table__3
- serialization.ddl struct values__tmp__table__3 { string tmp_values_col1}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.values__tmp__table__3
- name: default.values__tmp__table__3
- Truncated Path -> Alias:
-#### A masked pattern was here ####
-
- Stage: Stage-7
- Conditional Operator
-
- Stage: Stage-4
- Move Operator
- files:
- hdfs directory: true
- source: ### BLOBSTORE_STAGING_PATH ###
- destination: ### BLOBSTORE_STAGING_PATH ###
-
- Stage: Stage-0
- Move Operator
- tables:
- replace: false
- source: ### BLOBSTORE_STAGING_PATH ###
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns value
- columns.comments
- columns.types int
-#### A masked pattern was here ####
- location ### test.blobstore.path ###/qtest
- name default.qtest
- numFiles 2
- serialization.ddl struct qtest { i32 value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 28
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.qtest
-
- Stage: Stage-2
- Stats-Aggr Operator
- Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ###
-
- Stage: Stage-3
- Map Reduce
- Map Operator Tree:
- TableScan
- GatherStats: false
- File Output Operator
- compressed: false
- GlobalTableId: 0
- directory: ### BLOBSTORE_STAGING_PATH ###
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns value
- columns.comments
- columns.types int
-#### A masked pattern was here ####
- location ### test.blobstore.path ###/qtest
- name default.qtest
- numFiles 2
- serialization.ddl struct qtest { i32 value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 28
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.qtest
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Path -> Alias:
- ### BLOBSTORE_STAGING_PATH ###
- Path -> Partition:
- ### BLOBSTORE_STAGING_PATH ###
- Partition
- base file name: -ext-10002
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns value
- columns.comments
- columns.types int
-#### A masked pattern was here ####
- location ### test.blobstore.path ###/qtest
- name default.qtest
- numFiles 2
- serialization.ddl struct qtest { i32 value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 28
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns value
- columns.comments
- columns.types int
-#### A masked pattern was here ####
- location ### test.blobstore.path ###/qtest
- name default.qtest
- numFiles 2
- serialization.ddl struct qtest { i32 value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 28
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.qtest
- name: default.qtest
- Truncated Path -> Alias:
- ### BLOBSTORE_STAGING_PATH ###
-
- Stage: Stage-5
- Map Reduce
- Map Operator Tree:
- TableScan
- GatherStats: false
- File Output Operator
- compressed: false
- GlobalTableId: 0
- directory: ### BLOBSTORE_STAGING_PATH ###
- NumFilesPerFileSink: 1
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns value
- columns.comments
- columns.types int
-#### A masked pattern was here ####
- location ### test.blobstore.path ###/qtest
- name default.qtest
- numFiles 2
- serialization.ddl struct qtest { i32 value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 28
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.qtest
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
- Path -> Alias:
- ### BLOBSTORE_STAGING_PATH ###
- Path -> Partition:
- ### BLOBSTORE_STAGING_PATH ###
- Partition
- base file name: -ext-10002
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns value
- columns.comments
- columns.types int
-#### A masked pattern was here ####
- location ### test.blobstore.path ###/qtest
- name default.qtest
- numFiles 2
- serialization.ddl struct qtest { i32 value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 28
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- columns value
- columns.comments
- columns.types int
-#### A masked pattern was here ####
- location ### test.blobstore.path ###/qtest
- name default.qtest
- numFiles 2
- serialization.ddl struct qtest { i32 value}
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- totalSize 28
-#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.qtest
- name: default.qtest
- Truncated Path -> Alias:
- ### BLOBSTORE_STAGING_PATH ###
-
- Stage: Stage-6
- Move Operator
- files:
- hdfs directory: true
- source: ### BLOBSTORE_STAGING_PATH ###
- destination: ### BLOBSTORE_STAGING_PATH ###
-
-PREHOOK: query: SELECT * FROM qtest
-PREHOOK: type: QUERY
-PREHOOK: Input: default@qtest
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT * FROM qtest
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@qtest
-#### A masked pattern was here ####
-1
-10
-100
-1000
-2
-20
-200
-2000
http://git-wip-us.apache.org/repos/asf/hive/blob/e00b1a33/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out
----------------------------------------------------------------------
diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out
new file mode 100644
index 0000000..fbb52c1
--- /dev/null
+++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out
@@ -0,0 +1,599 @@
+PREHOOK: query: -- Insert unpartitioned table;
+DROP TABLE table1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- Insert unpartitioned table;
+DROP TABLE table1
+POSTHOOK: type: DROPTABLE
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Input: ### test.blobstore.path ###/table1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table1
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Input: ### test.blobstore.path ###/table1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table1
+PREHOOK: query: INSERT INTO TABLE table1 VALUES (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@table1
+POSTHOOK: query: INSERT INTO TABLE table1 VALUES (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@table1
+PREHOOK: query: INSERT INTO TABLE table1 VALUES (2)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@table1
+POSTHOOK: query: INSERT INTO TABLE table1 VALUES (2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@table1
+PREHOOK: query: SELECT * FROM table1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM table1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table1
+#### A masked pattern was here ####
+1
+2
+PREHOOK: query: EXPLAIN EXTENDED INSERT INTO TABLE table1 VALUES (1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED INSERT INTO TABLE table1 VALUES (1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-2 depends on stages: Stage-0, Stage-4, Stage-6
+ Stage-3
+ Stage-0 depends on stages: Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: values__tmp__table__3
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: UDFToInteger(tmp_values_col1) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ directory: ### BLOBSTORE_STAGING_PATH ###
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE
+ Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ###
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 2
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 4
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: Values__Tmp__Table__3
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns tmp_values_col1
+ columns.comments
+ columns.types string
+#### A masked pattern was here ####
+ name default.values__tmp__table__3
+ serialization.ddl struct values__tmp__table__3 { string tmp_values_col1}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns tmp_values_col1
+ columns.comments
+ columns.types string
+#### A masked pattern was here ####
+ name default.values__tmp__table__3
+ serialization.ddl struct values__tmp__table__3 { string tmp_values_col1}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.values__tmp__table__3
+ name: default.values__tmp__table__3
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ tables:
+ replace: false
+ source: ### BLOBSTORE_STAGING_PATH ###
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 2
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 4
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+ Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ###
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ directory: ### BLOBSTORE_STAGING_PATH ###
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 2
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 4
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+ Path -> Partition:
+ ### BLOBSTORE_STAGING_PATH ###
+ Partition
+ base file name: -ext-10002
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 2
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 4
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 2
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 4
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+ name: default.table1
+ Truncated Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: false
+ source: ### BLOBSTORE_STAGING_PATH ###
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 2
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 4
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ directory: ### BLOBSTORE_STAGING_PATH ###
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 2
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 4
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+ Path -> Partition:
+ ### BLOBSTORE_STAGING_PATH ###
+ Partition
+ base file name: -ext-10002
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 2
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 4
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 2
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 4
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+ name: default.table1
+ Truncated Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+
+ Stage: Stage-6
+ Move Operator
+ tables:
+ replace: false
+ source: ### BLOBSTORE_STAGING_PATH ###
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ numFiles 2
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 4
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+
+PREHOOK: query: DROP TABLE table1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@table1
+PREHOOK: Output: default@table1
+POSTHOOK: query: DROP TABLE table1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@table1
+POSTHOOK: Output: default@table1
+PREHOOK: query: -- Insert dynamic partitions;
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Input: ### test.blobstore.path ###/table1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table1
+POSTHOOK: query: -- Insert dynamic partitions;
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Input: ### test.blobstore.path ###/table1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table1
+PREHOOK: query: INSERT INTO TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__4
+PREHOOK: Output: default@table1
+POSTHOOK: query: INSERT INTO TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__4
+POSTHOOK: Output: default@table1@key=101
+POSTHOOK: Output: default@table1@key=202
+POSTHOOK: Output: default@table1@key=303
+POSTHOOK: Output: default@table1@key=404
+POSTHOOK: Output: default@table1@key=505
+POSTHOOK: Lineage: table1 PARTITION(key=101).id EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1 PARTITION(key=202).id EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1 PARTITION(key=303).id EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1 PARTITION(key=404).id EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1 PARTITION(key=505).id EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: INSERT INTO TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__5
+PREHOOK: Output: default@table1
+POSTHOOK: query: INSERT INTO TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__5
+POSTHOOK: Output: default@table1@key=101
+POSTHOOK: Output: default@table1@key=202
+POSTHOOK: Output: default@table1@key=303
+POSTHOOK: Output: default@table1@key=404
+POSTHOOK: Output: default@table1@key=505
+POSTHOOK: Lineage: table1 PARTITION(key=101).id EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1 PARTITION(key=202).id EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1 PARTITION(key=303).id EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1 PARTITION(key=404).id EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1 PARTITION(key=505).id EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT * FROM table1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table1
+PREHOOK: Input: default@table1@key=101
+PREHOOK: Input: default@table1@key=202
+PREHOOK: Input: default@table1@key=303
+PREHOOK: Input: default@table1@key=404
+PREHOOK: Input: default@table1@key=505
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM table1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table1
+POSTHOOK: Input: default@table1@key=101
+POSTHOOK: Input: default@table1@key=202
+POSTHOOK: Input: default@table1@key=303
+POSTHOOK: Input: default@table1@key=404
+POSTHOOK: Input: default@table1@key=505
+#### A masked pattern was here ####
+1 101
+1 101
+2 202
+2 202
+3 303
+3 303
+4 404
+4 404
+5 505
+5 505
+PREHOOK: query: EXPLAIN EXTENDED INSERT INTO TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505')
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED INSERT INTO TABLE table1 PARTITION (key) VALUES (1, '101'), (2, '202'), (3, '303'), (4, '404'), (5, '505')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: values__tmp__table__6
+ Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: UDFToInteger(tmp_values_col1) (type: int), tmp_values_col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), '_bucket_number' (type: string)
+ null sort order: aa
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: int)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: Values__Tmp__Table__6
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns tmp_values_col1,tmp_values_col2
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.values__tmp__table__6
+ serialization.ddl struct values__tmp__table__6 { string tmp_values_col1, string tmp_values_col2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns tmp_values_col1,tmp_values_col2
+ columns.comments
+ columns.types string:string
+#### A masked pattern was here ####
+ name default.values__tmp__table__6
+ serialization.ddl struct values__tmp__table__6 { string tmp_values_col1, string tmp_values_col2}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.values__tmp__table__6
+ name: default.values__tmp__table__6
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), KEY._col1 (type: string), KEY.'_bucket_number' (type: string)
+ outputColumnNames: _col0, _col1, '_bucket_number'
+ Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ directory: ### BLOBSTORE_STAGING_PATH ###
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ###
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count 2
+ bucket_field_name id
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ partition_columns key
+ partition_columns.types string
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ key
+ replace: false
+ source: ### BLOBSTORE_STAGING_PATH ###
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count 2
+ bucket_field_name id
+ columns id
+ columns.comments
+ columns.types int
+#### A masked pattern was here ####
+ location ### test.blobstore.path ###/table1
+ name default.table1
+ partition_columns key
+ partition_columns.types string
+ serialization.ddl struct table1 { i32 id}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+ Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ###
+
+PREHOOK: query: DROP TABLE table1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@table1
+PREHOOK: Output: default@table1
+POSTHOOK: query: DROP TABLE table1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@table1
+POSTHOOK: Output: default@table1
http://git-wip-us.apache.org/repos/asf/hive/blob/e00b1a33/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out
----------------------------------------------------------------------
diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out
new file mode 100644
index 0000000..9f575a6
--- /dev/null
+++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out
@@ -0,0 +1,653 @@
+PREHOOK: query: -- Create a simple source table;
+DROP TABLE table1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- Create a simple source table;
+DROP TABLE table1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE table1 (id int, key string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table1
+POSTHOOK: query: CREATE TABLE table1 (id int, key string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table1
+PREHOOK: query: INSERT INTO TABLE table1 VALUES (1, 'k1')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@table1
+POSTHOOK: query: INSERT INTO TABLE table1 VALUES (1, 'k1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@table1
+POSTHOOK: Lineage: table1.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: INSERT INTO TABLE table1 VALUES (2, 'k2')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@table1
+POSTHOOK: query: INSERT INTO TABLE table1 VALUES (2, 'k2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@table1
+POSTHOOK: Lineage: table1.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: table1.key SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: -- Write and verify data on the directory;
+INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table1.dir/' SELECT * FROM table1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table1
+PREHOOK: Output: ### test.blobstore.path ###/table1.dir
+POSTHOOK: query: -- Write and verify data on the directory;
+INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table1.dir/' SELECT * FROM table1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table1
+POSTHOOK: Output: ### test.blobstore.path ###/table1.dir
+1k1
+2k2
+PREHOOK: query: -- Write and verify data using FROM ... INSERT OVERWRITE DIRECTORY;
+FROM table1
+INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table1.dir/' SELECT id
+INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table2.dir/' SELECT key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table1
+PREHOOK: Output: ### test.blobstore.path ###/table1.dir
+PREHOOK: Output: ### test.blobstore.path ###/table2.dir
+POSTHOOK: query: -- Write and verify data using FROM ... INSERT OVERWRITE DIRECTORY;
+FROM table1
+INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table1.dir/' SELECT id
+INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table2.dir/' SELECT key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table1
+POSTHOOK: Output: ### test.blobstore.path ###/table1.dir
+POSTHOOK: Output: ### test.blobstore.path ###/table2.dir
+1
+2
+k1
+k2
+PREHOOK: query: -- Verify plan is optimizedl
+EXPLAIN EXTENDED INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table1.dir/' SELECT * FROM table1
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Verify plan is optimizedl
+EXPLAIN EXTENDED INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table1.dir/' SELECT * FROM table1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-6 depends on stages: Stage-1 , consists of Stage-3, Stage-2, Stage-4
+ Stage-3
+ Stage-2
+ Stage-0 depends on stages: Stage-2
+ Stage-4
+ Stage-5 depends on stages: Stage-4
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: table1
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: id (type: int), key (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ directory: ### BLOBSTORE_STAGING_PATH ###
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ###
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types int:string
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: table1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns id,key
+ columns.comments
+ columns.types int:string
+ field.delim ,
+#### A masked pattern was here ####
+ name default.table1
+ numFiles 2
+ numRows 2
+ rawDataSize 8
+ serialization.ddl struct table1 { i32 id, string key}
+ serialization.format ,
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 10
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns id,key
+ columns.comments
+ columns.types int:string
+ field.delim ,
+#### A masked pattern was here ####
+ name default.table1
+ numFiles 2
+ numRows 2
+ rawDataSize 8
+ serialization.ddl struct table1 { i32 id, string key}
+ serialization.format ,
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 10
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+ name: default.table1
+ Truncated Path -> Alias:
+ /table1 [table1]
+
+ Stage: Stage-6
+ Conditional Operator
+
+ Stage: Stage-3
+ Move Operator
+ files:
+ hdfs directory: true
+ source: ### BLOBSTORE_STAGING_PATH ###
+ destination: ### test.blobstore.path ###/table1.dir
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ directory: ### BLOBSTORE_STAGING_PATH ###
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types int:string
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+ Path -> Partition:
+ ### BLOBSTORE_STAGING_PATH ###
+ Partition
+ base file name: -ext-10002
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types int:string
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types int:string
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Truncated Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+
+ Stage: Stage-0
+ Move Operator
+ files:
+ hdfs directory: true
+ source: ### BLOBSTORE_STAGING_PATH ###
+ destination: ### test.blobstore.path ###/table1.dir
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ directory: ### BLOBSTORE_STAGING_PATH ###
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types int:string
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+ Path -> Partition:
+ ### BLOBSTORE_STAGING_PATH ###
+ Partition
+ base file name: -ext-10002
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types int:string
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types int:string
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Truncated Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+ source: ### BLOBSTORE_STAGING_PATH ###
+ destination: ### test.blobstore.path ###/table1.dir
+
+PREHOOK: query: EXPLAIN EXTENDED FROM table1
+ INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table1.dir/' SELECT id
+ INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table2.dir/' SELECT key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED FROM table1
+ INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table1.dir/' SELECT id
+ INSERT OVERWRITE DIRECTORY '### test.blobstore.path ###/table2.dir/' SELECT key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-7 depends on stages: Stage-2 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-3
+ Stage-0 depends on stages: Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+ Stage-12 depends on stages: Stage-2 , consists of Stage-9, Stage-8, Stage-10
+ Stage-9
+ Stage-8
+ Stage-1 depends on stages: Stage-8
+ Stage-10
+ Stage-11 depends on stages: Stage-10
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: table1
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ GatherStats: false
+ Select Operator
+ expressions: id (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ directory: ### BLOBSTORE_STAGING_PATH ###
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ###
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types int
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 2
+ directory: ### BLOBSTORE_STAGING_PATH ###
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ###
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types string
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: table1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns id,key
+ columns.comments
+ columns.types int:string
+ field.delim ,
+#### A masked pattern was here ####
+ name default.table1
+ numFiles 2
+ numRows 2
+ rawDataSize 8
+ serialization.ddl struct table1 { i32 id, string key}
+ serialization.format ,
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 10
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ bucket_count -1
+ columns id,key
+ columns.comments
+ columns.types int:string
+ field.delim ,
+#### A masked pattern was here ####
+ name default.table1
+ numFiles 2
+ numRows 2
+ rawDataSize 8
+ serialization.ddl struct table1 { i32 id, string key}
+ serialization.format ,
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 10
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.table1
+ name: default.table1
+ Truncated Path -> Alias:
+ /table1 [table1]
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+ source: ### BLOBSTORE_STAGING_PATH ###
+ destination: ### test.blobstore.path ###/table1.dir
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ directory: ### BLOBSTORE_STAGING_PATH ###
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types int
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+ Path -> Partition:
+ ### BLOBSTORE_STAGING_PATH ###
+ Partition
+ base file name: -ext-10004
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types int
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types int
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Truncated Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+
+ Stage: Stage-0
+ Move Operator
+ files:
+ hdfs directory: true
+ source: ### BLOBSTORE_STAGING_PATH ###
+ destination: ### test.blobstore.path ###/table1.dir
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ directory: ### BLOBSTORE_STAGING_PATH ###
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types int
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+ Path -> Partition:
+ ### BLOBSTORE_STAGING_PATH ###
+ Partition
+ base file name: -ext-10004
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types int
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types int
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Truncated Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+ source: ### BLOBSTORE_STAGING_PATH ###
+ destination: ### test.blobstore.path ###/table1.dir
+
+ Stage: Stage-12
+ Conditional Operator
+
+ Stage: Stage-9
+ Move Operator
+ files:
+ hdfs directory: true
+ source: ### BLOBSTORE_STAGING_PATH ###
+ destination: ### test.blobstore.path ###/table2.dir
+
+ Stage: Stage-8
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ directory: ### BLOBSTORE_STAGING_PATH ###
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types string
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+ Path -> Partition:
+ ### BLOBSTORE_STAGING_PATH ###
+ Partition
+ base file name: -ext-10005
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types string
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types string
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Truncated Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+
+ Stage: Stage-1
+ Move Operator
+ files:
+ hdfs directory: true
+ source: ### BLOBSTORE_STAGING_PATH ###
+ destination: ### test.blobstore.path ###/table2.dir
+
+ Stage: Stage-10
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ directory: ### BLOBSTORE_STAGING_PATH ###
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types string
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+ Path -> Partition:
+ ### BLOBSTORE_STAGING_PATH ###
+ Partition
+ base file name: -ext-10005
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types string
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0
+ columns.types string
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Truncated Path -> Alias:
+ ### BLOBSTORE_STAGING_PATH ###
+
+ Stage: Stage-11
+ Move Operator
+ files:
+ hdfs directory: true
+ source: ### BLOBSTORE_STAGING_PATH ###
+ destination: ### test.blobstore.path ###/table2.dir
+