You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kr...@apache.org on 2023/05/19 07:13:37 UTC
[hive] branch master updated: HIVE-27267: Incorrect results when doing bucket map join on decimal bucketed column with subquery (Seonggon Namgung, reviewed by Sourabh Badhya, Krisztian Kasa)
This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 300b23846fe HIVE-27267: Incorrect results when doing bucket map join on decimal bucketed column with subquery (Seonggon Namgung, reviewed by Sourabh Badhya, Krisztian Kasa)
300b23846fe is described below
commit 300b23846feafbdef4c0b38c31000614ceedb95a
Author: seonggon <ln...@postech.ac.kr>
AuthorDate: Fri May 19 16:13:24 2023 +0900
HIVE-27267: Incorrect results when doing bucket map join on decimal bucketed column with subquery (Seonggon Namgung, reviewed by Sourabh Badhya, Krisztian Kasa)
---
.../hive/ql/optimizer/ConvertJoinMapJoin.java | 5 +-
.../queries/clientpositive/bucket_map_join_tez3.q | 129 ++
.../clientpositive/llap/bucket_map_join_tez3.q.out | 1996 ++++++++++++++++++++
3 files changed, 2128 insertions(+), 2 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
index f81ee62477b..3bf81b17bab 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
@@ -659,15 +659,16 @@ public class ConvertJoinMapJoin implements SemanticNodeProcessor {
// Prepare updated partition columns for small table(s).
// Get the positions of bucketed columns
- int i = 0;
+ int bigTableExprPos = 0;
Map<String, ExprNodeDesc> colExprMap = bigTableRS.getColumnExprMap();
for (ExprNodeDesc bigTableExpr : bigTablePartitionCols) {
// It is guaranteed there is only 1 list within listBucketCols.
for (String colName : listBucketCols.get(0)) {
if (colExprMap.get(colName).isSame(bigTableExpr)) {
- positions.add(i++);
+ positions.add(bigTableExprPos);
}
}
+ bigTableExprPos = bigTableExprPos + 1;
}
}
diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_tez3.q b/ql/src/test/queries/clientpositive/bucket_map_join_tez3.q
new file mode 100644
index 00000000000..bd32a69085d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/bucket_map_join_tez3.q
@@ -0,0 +1,129 @@
+-- Test for HIVE-27267
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+create table target_table(date_col date, string_col string, decimal_col decimal(38,0)) clustered by (decimal_col) into 7 buckets stored as orc tblproperties ('bucketing_version'='2', 'transactional'='true', 'transactional_properties'='default');
+insert into table target_table values
+('2017-05-17', 'pipeline', '50000000000000000000441610525'),
+('2018-12-20', 'pipeline', '50000000000000000001048981030'),
+('2020-06-30', 'pipeline', '50000000000000000002332575516'),
+('2021-08-16', 'pipeline', '50000000000000000003897973989'),
+('2017-06-06', 'pipeline', '50000000000000000000449148729'),
+('2017-09-08', 'pipeline', '50000000000000000000525378314'),
+('2022-08-30', 'pipeline', '50000000000000000005905545593'),
+('2022-08-16', 'pipeline', '50000000000000000005905545593'),
+('2018-05-03', 'pipeline', '50000000000000000000750826355'),
+('2020-01-10', 'pipeline', '50000000000000000001816579677'),
+('2021-11-01', 'pipeline', '50000000000000000004269423714'),
+('2017-11-07', 'pipeline', '50000000000000000000585901787'),
+('2019-10-15', 'pipeline', '50000000000000000001598843430'),
+('2020-04-01', 'pipeline', '50000000000000000002035795461'),
+('2020-02-24', 'pipeline', '50000000000000000001932600185'),
+('2020-04-27', 'pipeline', '50000000000000000002108160849'),
+('2016-07-05', 'pipeline', '50000000000000000000054405114'),
+('2020-06-02', 'pipeline', '50000000000000000002234387967'),
+('2020-08-21', 'pipeline', '50000000000000000002529168758'),
+('2021-02-17', 'pipeline', '50000000000000000003158511687');
+
+create table source_table(date_col date, string_col string, decimal_col decimal(38,0)) clustered by (decimal_col) into 7 buckets stored as orc tblproperties ('bucketing_version'='2', 'transactional'='true', 'transactional_properties'='default');
+insert into table source_table values
+('2022-08-30', 'pipeline', '50000000000000000005905545593'),
+('2022-08-16', 'pipeline', '50000000000000000005905545593'),
+('2022-09-01', 'pipeline', '50000000000000000006008686831'),
+('2022-08-30', 'pipeline', '50000000000000000005992620837'),
+('2022-09-01', 'pipeline', '50000000000000000005992620837'),
+('2022-09-01', 'pipeline', '50000000000000000005992621067'),
+('2022-08-30', 'pipeline', '50000000000000000005992621067');
+
+
+-- Test 2 queries in 4 configs.
+
+-- Each query has 1 join that can be converted to bucket join.
+-- One of the query receives the small table from Map vertex while the other recives it from Reducer vertex.
+
+-- 4 configs enfoce MapJoin to be converted to one of the following joins:
+-- 1. BucketMapJoin, 2. MapJoin, 3. VectorBucketMapJoin, 4. VectorMapJoin
+
+set hive.auto.convert.join=true;
+set hive.optimize.dynamic.partition.hashjoin=false;
+
+-- 1. BucketMapJoin
+set hive.convert.join.bucket.mapjoin.tez=true;
+set hive.vectorized.execution.enabled=false;
+
+explain extended
+select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
+select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
+
+explain extended
+select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
+select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
+
+-- 2. MapJoin
+set hive.convert.join.bucket.mapjoin.tez=false;
+set hive.vectorized.execution.enabled=false;
+
+explain extended
+select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
+select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
+
+explain extended
+select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
+select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
+
+-- 3. VectorBucketMapJoin
+set hive.convert.join.bucket.mapjoin.tez=true;
+set hive.vectorized.execution.enabled=true;
+
+explain extended
+select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
+select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
+
+explain extended
+select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
+select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
+
+-- 4. VectorMapJoin
+set hive.convert.join.bucket.mapjoin.tez=false;
+set hive.vectorized.execution.enabled=true;
+
+explain extended
+select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
+select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
+
+explain extended
+select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
+select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col;
diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out
new file mode 100644
index 00000000000..98a6023cedb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out
@@ -0,0 +1,1996 @@
+PREHOOK: query: create table target_table(date_col date, string_col string, decimal_col decimal(38,0)) clustered by (decimal_col) into 7 buckets stored as orc tblproperties ('bucketing_version'='2', 'transactional'='true', 'transactional_properties'='default')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@target_table
+POSTHOOK: query: create table target_table(date_col date, string_col string, decimal_col decimal(38,0)) clustered by (decimal_col) into 7 buckets stored as orc tblproperties ('bucketing_version'='2', 'transactional'='true', 'transactional_properties'='default')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@target_table
+PREHOOK: query: insert into table target_table values
+('2017-05-17', 'pipeline', '50000000000000000000441610525'),
+('2018-12-20', 'pipeline', '50000000000000000001048981030'),
+('2020-06-30', 'pipeline', '50000000000000000002332575516'),
+('2021-08-16', 'pipeline', '50000000000000000003897973989'),
+('2017-06-06', 'pipeline', '50000000000000000000449148729'),
+('2017-09-08', 'pipeline', '50000000000000000000525378314'),
+('2022-08-30', 'pipeline', '50000000000000000005905545593'),
+('2022-08-16', 'pipeline', '50000000000000000005905545593'),
+('2018-05-03', 'pipeline', '50000000000000000000750826355'),
+('2020-01-10', 'pipeline', '50000000000000000001816579677'),
+('2021-11-01', 'pipeline', '50000000000000000004269423714'),
+('2017-11-07', 'pipeline', '50000000000000000000585901787'),
+('2019-10-15', 'pipeline', '50000000000000000001598843430'),
+('2020-04-01', 'pipeline', '50000000000000000002035795461'),
+('2020-02-24', 'pipeline', '50000000000000000001932600185'),
+('2020-04-27', 'pipeline', '50000000000000000002108160849'),
+('2016-07-05', 'pipeline', '50000000000000000000054405114'),
+('2020-06-02', 'pipeline', '50000000000000000002234387967'),
+('2020-08-21', 'pipeline', '50000000000000000002529168758'),
+('2021-02-17', 'pipeline', '50000000000000000003158511687')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@target_table
+POSTHOOK: query: insert into table target_table values
+('2017-05-17', 'pipeline', '50000000000000000000441610525'),
+('2018-12-20', 'pipeline', '50000000000000000001048981030'),
+('2020-06-30', 'pipeline', '50000000000000000002332575516'),
+('2021-08-16', 'pipeline', '50000000000000000003897973989'),
+('2017-06-06', 'pipeline', '50000000000000000000449148729'),
+('2017-09-08', 'pipeline', '50000000000000000000525378314'),
+('2022-08-30', 'pipeline', '50000000000000000005905545593'),
+('2022-08-16', 'pipeline', '50000000000000000005905545593'),
+('2018-05-03', 'pipeline', '50000000000000000000750826355'),
+('2020-01-10', 'pipeline', '50000000000000000001816579677'),
+('2021-11-01', 'pipeline', '50000000000000000004269423714'),
+('2017-11-07', 'pipeline', '50000000000000000000585901787'),
+('2019-10-15', 'pipeline', '50000000000000000001598843430'),
+('2020-04-01', 'pipeline', '50000000000000000002035795461'),
+('2020-02-24', 'pipeline', '50000000000000000001932600185'),
+('2020-04-27', 'pipeline', '50000000000000000002108160849'),
+('2016-07-05', 'pipeline', '50000000000000000000054405114'),
+('2020-06-02', 'pipeline', '50000000000000000002234387967'),
+('2020-08-21', 'pipeline', '50000000000000000002529168758'),
+('2021-02-17', 'pipeline', '50000000000000000003158511687')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@target_table
+POSTHOOK: Lineage: target_table.date_col SCRIPT []
+POSTHOOK: Lineage: target_table.decimal_col SCRIPT []
+POSTHOOK: Lineage: target_table.string_col SCRIPT []
+PREHOOK: query: create table source_table(date_col date, string_col string, decimal_col decimal(38,0)) clustered by (decimal_col) into 7 buckets stored as orc tblproperties ('bucketing_version'='2', 'transactional'='true', 'transactional_properties'='default')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@source_table
+POSTHOOK: query: create table source_table(date_col date, string_col string, decimal_col decimal(38,0)) clustered by (decimal_col) into 7 buckets stored as orc tblproperties ('bucketing_version'='2', 'transactional'='true', 'transactional_properties'='default')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@source_table
+PREHOOK: query: insert into table source_table values
+('2022-08-30', 'pipeline', '50000000000000000005905545593'),
+('2022-08-16', 'pipeline', '50000000000000000005905545593'),
+('2022-09-01', 'pipeline', '50000000000000000006008686831'),
+('2022-08-30', 'pipeline', '50000000000000000005992620837'),
+('2022-09-01', 'pipeline', '50000000000000000005992620837'),
+('2022-09-01', 'pipeline', '50000000000000000005992621067'),
+('2022-08-30', 'pipeline', '50000000000000000005992621067')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@source_table
+POSTHOOK: query: insert into table source_table values
+('2022-08-30', 'pipeline', '50000000000000000005905545593'),
+('2022-08-16', 'pipeline', '50000000000000000005905545593'),
+('2022-09-01', 'pipeline', '50000000000000000006008686831'),
+('2022-08-30', 'pipeline', '50000000000000000005992620837'),
+('2022-09-01', 'pipeline', '50000000000000000005992620837'),
+('2022-09-01', 'pipeline', '50000000000000000005992621067'),
+('2022-08-30', 'pipeline', '50000000000000000005992621067')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@source_table
+POSTHOOK: Lineage: source_table.date_col SCRIPT []
+POSTHOOK: Lineage: source_table.decimal_col SCRIPT []
+POSTHOOK: Lineage: source_table.string_col SCRIPT []
+PREHOOK: query: explain extended
+select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source_table
+PREHOOK: Input: default@target_table
+#### A masked pattern was here ####
+POSTHOOK: query: explain extended
+select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source_table
+POSTHOOK: Input: default@target_table
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT `t0`.`date_col`, CAST('pipeline' AS STRING) AS `string_col`, `t0`.`decimal_col`, `t2`.`date_col` AS `date_col1`, 'pipeline' AS `string_col1`, `t2`.`decimal_col` AS `decimal_col1`
+FROM (SELECT `date_col`, `decimal_col`
+FROM `default`.`target_table`
+WHERE `string_col` = 'pipeline' AND CASE WHEN `decimal_col` IS NOT NULL THEN CAST(`decimal_col` AS STRING) = '50000000000000000005905545593' ELSE FALSE END AND (`date_col` IS NOT NULL AND `decimal_col` IS NOT NULL)) AS `t0`
+INNER JOIN (SELECT `date_col`, `decimal_col`
+FROM `default`.`source_table`
+WHERE CASE WHEN `decimal_col` IS NOT NULL THEN CAST(`decimal_col` AS STRING) = '50000000000000000005905545593' ELSE FALSE END AND (`date_col` IS NOT NULL AND `decimal_col` IS NOT NULL)) AS `t2` ON `t0`.`date_col` = `t2`.`date_col` AND `t0`.`decimal_col` = `t2`.`decimal_col`
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (CUSTOM_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: target_table
+ filterExpr: ((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 20 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 10 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: date_col (type: date), decimal_col (type: decimal(38,0))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Map 2 => 1
+ keys:
+ 0 _col0 (type: date), _col1 (type: decimal(38,0))
+ 1 _col0 (type: date), _col1 (type: decimal(38,0))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Map 2
+ Position of Big Table: 0
+ Statistics: Num rows: 30 Data size: 10080 Basic stats: COMPLETE Column stats: COMPLETE
+ BucketMapJoin: true
+ Select Operator
+ expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1,_col2,_col3,_col4,_col5
+ columns.types date:string:decimal(38,0):date:string:decimal(38,0)
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: target_table
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.target_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.comments
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.target_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.target_table
+ name: default.target_table
+ Truncated Path -> Alias:
+ /target_table [target_table]
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: source_table
+ filterExpr: (if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 7 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: date_col (type: date), decimal_col (type: decimal(38,0))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: date), _col1 (type: decimal(38,0))
+ null sort order: zz
+ numBuckets: -1
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: decimal(38,0))
+ Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: 1
+ auto parallelism: false
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: source_table
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.source_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.comments
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.source_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.source_table
+ name: default.source_table
+ Truncated Path -> Alias:
+ /source_table [source_table]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source_table
+PREHOOK: Input: default@target_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source_table
+POSTHOOK: Input: default@target_table
+#### A masked pattern was here ####
+2022-08-16 pipeline 50000000000000000005905545593 2022-08-16 pipeline 50000000000000000005905545593
+2022-08-30 pipeline 50000000000000000005905545593 2022-08-30 pipeline 50000000000000000005905545593
+PREHOOK: query: explain extended
+select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source_table
+PREHOOK: Input: default@target_table
+#### A masked pattern was here ####
+POSTHOOK: query: explain extended
+select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source_table
+POSTHOOK: Input: default@target_table
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT `t0`.`date_col`, CAST('pipeline' AS STRING) AS `string_col`, `t0`.`decimal_col`, `t3`.`date_col` AS `date_col1`, 'pipeline' AS `string_col1`, `t3`.`decimal_col` AS `decimal_col1`
+FROM (SELECT `date_col`, `decimal_col`
+FROM `default`.`target_table`
+WHERE `string_col` = 'pipeline' AND CASE WHEN `decimal_col` IS NOT NULL THEN CAST(`decimal_col` AS STRING) = '50000000000000000005905545593' ELSE FALSE END AND (`date_col` IS NOT NULL AND `decimal_col` IS NOT NULL)) AS `t0`
+INNER JOIN (SELECT `date_col`, `decimal_col`
+FROM `default`.`source_table`
+WHERE CASE WHEN `decimal_col` IS NOT NULL THEN CAST(`decimal_col` AS STRING) = '50000000000000000005905545593' ELSE FALSE END AND (`date_col` IS NOT NULL AND `decimal_col` IS NOT NULL)
+GROUP BY `date_col`, `decimal_col`) AS `t3` ON `t0`.`date_col` = `t3`.`date_col` AND `t0`.`decimal_col` = `t3`.`decimal_col`
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 3 (CUSTOM_EDGE)
+ Reducer 3 <- Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: target_table
+ filterExpr: ((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 20 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 10 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: date_col (type: date), decimal_col (type: decimal(38,0))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Reducer 3 => 1
+ keys:
+ 0 _col0 (type: date), _col1 (type: decimal(38,0))
+ 1 _col0 (type: date), _col1 (type: decimal(38,0))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Reducer 3
+ Position of Big Table: 0
+ Statistics: Num rows: 10 Data size: 3360 Basic stats: COMPLETE Column stats: COMPLETE
+ BucketMapJoin: true
+ Select Operator
+ expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1,_col2,_col3,_col4,_col5
+ columns.types date:string:decimal(38,0):date:string:decimal(38,0)
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: target_table
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.target_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.comments
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.target_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.target_table
+ name: default.target_table
+ Truncated Path -> Alias:
+ /target_table [target_table]
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: source_table
+ filterExpr: (if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 7 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: date_col (type: date), decimal_col (type: decimal(38,0))
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: date), _col1 (type: decimal(38,0))
+ null sort order: zz
+ numBuckets: -1
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0))
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ auto parallelism: true
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: source_table
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.source_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.comments
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.source_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.source_table
+ name: default.source_table
+ Truncated Path -> Alias:
+ /source_table [source_table]
+ Reducer 3
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: date), _col1 (type: decimal(38,0))
+ null sort order: zz
+ numBuckets: -1
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: decimal(38,0))
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: 1
+ auto parallelism: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source_table
+PREHOOK: Input: default@target_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source_table
+POSTHOOK: Input: default@target_table
+#### A masked pattern was here ####
+2022-08-16 pipeline 50000000000000000005905545593 2022-08-16 pipeline 50000000000000000005905545593
+2022-08-30 pipeline 50000000000000000005905545593 2022-08-30 pipeline 50000000000000000005905545593
+PREHOOK: query: explain extended
+select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source_table
+PREHOOK: Input: default@target_table
+#### A masked pattern was here ####
+POSTHOOK: query: explain extended
+select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source_table
+POSTHOOK: Input: default@target_table
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT `t0`.`date_col`, CAST('pipeline' AS STRING) AS `string_col`, `t0`.`decimal_col`, `t2`.`date_col` AS `date_col1`, 'pipeline' AS `string_col1`, `t2`.`decimal_col` AS `decimal_col1`
+FROM (SELECT `date_col`, `decimal_col`
+FROM `default`.`target_table`
+WHERE `string_col` = 'pipeline' AND CASE WHEN `decimal_col` IS NOT NULL THEN CAST(`decimal_col` AS STRING) = '50000000000000000005905545593' ELSE FALSE END AND (`date_col` IS NOT NULL AND `decimal_col` IS NOT NULL)) AS `t0`
+INNER JOIN (SELECT `date_col`, `decimal_col`
+FROM `default`.`source_table`
+WHERE CASE WHEN `decimal_col` IS NOT NULL THEN CAST(`decimal_col` AS STRING) = '50000000000000000005905545593' ELSE FALSE END AND (`date_col` IS NOT NULL AND `decimal_col` IS NOT NULL)) AS `t2` ON `t0`.`date_col` = `t2`.`date_col` AND `t0`.`decimal_col` = `t2`.`decimal_col`
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: target_table
+ filterExpr: ((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 20 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 10 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: date_col (type: date), decimal_col (type: decimal(38,0))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Map 2 => 3
+ keys:
+ 0 _col0 (type: date), _col1 (type: decimal(38,0))
+ 1 _col0 (type: date), _col1 (type: decimal(38,0))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Map 2
+ Position of Big Table: 0
+ Statistics: Num rows: 30 Data size: 10080 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1,_col2,_col3,_col4,_col5
+ columns.types date:string:decimal(38,0):date:string:decimal(38,0)
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: target_table
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.target_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.comments
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.target_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.target_table
+ name: default.target_table
+ Truncated Path -> Alias:
+ /target_table [target_table]
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: source_table
+ filterExpr: (if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 7 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: date_col (type: date), decimal_col (type: decimal(38,0))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: date), _col1 (type: decimal(38,0))
+ null sort order: zz
+ numBuckets: -1
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0))
+ Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: 1
+ auto parallelism: true
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: source_table
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.source_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.comments
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.source_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.source_table
+ name: default.source_table
+ Truncated Path -> Alias:
+ /source_table [source_table]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source_table
+PREHOOK: Input: default@target_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source_table
+POSTHOOK: Input: default@target_table
+#### A masked pattern was here ####
+2022-08-16 pipeline 50000000000000000005905545593 2022-08-16 pipeline 50000000000000000005905545593
+2022-08-30 pipeline 50000000000000000005905545593 2022-08-30 pipeline 50000000000000000005905545593
+PREHOOK: query: explain extended
+select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source_table
+PREHOOK: Input: default@target_table
+#### A masked pattern was here ####
+POSTHOOK: query: explain extended
+select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source_table
+POSTHOOK: Input: default@target_table
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT `t0`.`date_col`, CAST('pipeline' AS STRING) AS `string_col`, `t0`.`decimal_col`, `t3`.`date_col` AS `date_col1`, 'pipeline' AS `string_col1`, `t3`.`decimal_col` AS `decimal_col1`
+FROM (SELECT `date_col`, `decimal_col`
+FROM `default`.`target_table`
+WHERE `string_col` = 'pipeline' AND CASE WHEN `decimal_col` IS NOT NULL THEN CAST(`decimal_col` AS STRING) = '50000000000000000005905545593' ELSE FALSE END AND (`date_col` IS NOT NULL AND `decimal_col` IS NOT NULL)) AS `t0`
+INNER JOIN (SELECT `date_col`, `decimal_col`
+FROM `default`.`source_table`
+WHERE CASE WHEN `decimal_col` IS NOT NULL THEN CAST(`decimal_col` AS STRING) = '50000000000000000005905545593' ELSE FALSE END AND (`date_col` IS NOT NULL AND `decimal_col` IS NOT NULL)
+GROUP BY `date_col`, `decimal_col`) AS `t3` ON `t0`.`date_col` = `t3`.`date_col` AND `t0`.`decimal_col` = `t3`.`decimal_col`
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 3 (BROADCAST_EDGE)
+ Reducer 3 <- Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: target_table
+ filterExpr: ((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 20 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 10 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: date_col (type: date), decimal_col (type: decimal(38,0))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Reducer 3 => 1
+ keys:
+ 0 _col0 (type: date), _col1 (type: decimal(38,0))
+ 1 _col0 (type: date), _col1 (type: decimal(38,0))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Reducer 3
+ Position of Big Table: 0
+ Statistics: Num rows: 10 Data size: 3360 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1,_col2,_col3,_col4,_col5
+ columns.types date:string:decimal(38,0):date:string:decimal(38,0)
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: target_table
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.target_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.comments
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.target_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.target_table
+ name: default.target_table
+ Truncated Path -> Alias:
+ /target_table [target_table]
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: source_table
+ filterExpr: (if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 7 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: date_col (type: date), decimal_col (type: decimal(38,0))
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: date), _col1 (type: decimal(38,0))
+ null sort order: zz
+ numBuckets: -1
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0))
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ auto parallelism: true
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: source_table
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.source_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.comments
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.source_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.source_table
+ name: default.source_table
+ Truncated Path -> Alias:
+ /source_table [source_table]
+ Reducer 3
+ Execution mode: llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: date), _col1 (type: decimal(38,0))
+ null sort order: zz
+ numBuckets: -1
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0))
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: 1
+ auto parallelism: true
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source_table
+PREHOOK: Input: default@target_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source_table
+POSTHOOK: Input: default@target_table
+#### A masked pattern was here ####
+2022-08-16 pipeline 50000000000000000005905545593 2022-08-16 pipeline 50000000000000000005905545593
+2022-08-30 pipeline 50000000000000000005905545593 2022-08-30 pipeline 50000000000000000005905545593
+PREHOOK: query: explain extended
+select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source_table
+PREHOOK: Input: default@target_table
+#### A masked pattern was here ####
+POSTHOOK: query: explain extended
+select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source_table
+POSTHOOK: Input: default@target_table
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT `t0`.`date_col`, CAST('pipeline' AS STRING) AS `string_col`, `t0`.`decimal_col`, `t2`.`date_col` AS `date_col1`, 'pipeline' AS `string_col1`, `t2`.`decimal_col` AS `decimal_col1`
+FROM (SELECT `date_col`, `decimal_col`
+FROM `default`.`target_table`
+WHERE `string_col` = 'pipeline' AND CASE WHEN `decimal_col` IS NOT NULL THEN CAST(`decimal_col` AS STRING) = '50000000000000000005905545593' ELSE FALSE END AND (`date_col` IS NOT NULL AND `decimal_col` IS NOT NULL)) AS `t0`
+INNER JOIN (SELECT `date_col`, `decimal_col`
+FROM `default`.`source_table`
+WHERE CASE WHEN `decimal_col` IS NOT NULL THEN CAST(`decimal_col` AS STRING) = '50000000000000000005905545593' ELSE FALSE END AND (`date_col` IS NOT NULL AND `decimal_col` IS NOT NULL)) AS `t2` ON `t0`.`date_col` = `t2`.`date_col` AND `t0`.`decimal_col` = `t2`.`decimal_col`
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (CUSTOM_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: target_table
+ filterExpr: ((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 20 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 10 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: date_col (type: date), decimal_col (type: decimal(38,0))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Map 2 => 1
+ keys:
+ 0 _col0 (type: date), _col1 (type: decimal(38,0))
+ 1 _col0 (type: date), _col1 (type: decimal(38,0))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Map 2
+ Position of Big Table: 0
+ Statistics: Num rows: 30 Data size: 10080 Basic stats: COMPLETE Column stats: COMPLETE
+ BucketMapJoin: true
+ Select Operator
+ expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1,_col2,_col3,_col4,_col5
+ columns.types date:string:decimal(38,0):date:string:decimal(38,0)
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: target_table
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.target_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.comments
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.target_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.target_table
+ name: default.target_table
+ Truncated Path -> Alias:
+ /target_table [target_table]
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: source_table
+ filterExpr: (if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 7 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: date_col (type: date), decimal_col (type: decimal(38,0))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: date), _col1 (type: decimal(38,0))
+ null sort order: zz
+ numBuckets: -1
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: decimal(38,0))
+ Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: 1
+ auto parallelism: false
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: source_table
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.source_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.comments
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.source_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.source_table
+ name: default.source_table
+ Truncated Path -> Alias:
+ /source_table [source_table]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source_table
+PREHOOK: Input: default@target_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source_table
+POSTHOOK: Input: default@target_table
+#### A masked pattern was here ####
+2022-08-16 pipeline 50000000000000000005905545593 2022-08-16 pipeline 50000000000000000005905545593
+2022-08-30 pipeline 50000000000000000005905545593 2022-08-30 pipeline 50000000000000000005905545593
+PREHOOK: query: explain extended
+select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source_table
+PREHOOK: Input: default@target_table
+#### A masked pattern was here ####
+POSTHOOK: query: explain extended
+select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source_table
+POSTHOOK: Input: default@target_table
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT `t0`.`date_col`, CAST('pipeline' AS STRING) AS `string_col`, `t0`.`decimal_col`, `t3`.`date_col` AS `date_col1`, 'pipeline' AS `string_col1`, `t3`.`decimal_col` AS `decimal_col1`
+FROM (SELECT `date_col`, `decimal_col`
+FROM `default`.`target_table`
+WHERE `string_col` = 'pipeline' AND CASE WHEN `decimal_col` IS NOT NULL THEN CAST(`decimal_col` AS STRING) = '50000000000000000005905545593' ELSE FALSE END AND (`date_col` IS NOT NULL AND `decimal_col` IS NOT NULL)) AS `t0`
+INNER JOIN (SELECT `date_col`, `decimal_col`
+FROM `default`.`source_table`
+WHERE CASE WHEN `decimal_col` IS NOT NULL THEN CAST(`decimal_col` AS STRING) = '50000000000000000005905545593' ELSE FALSE END AND (`date_col` IS NOT NULL AND `decimal_col` IS NOT NULL)
+GROUP BY `date_col`, `decimal_col`) AS `t3` ON `t0`.`date_col` = `t3`.`date_col` AND `t0`.`decimal_col` = `t3`.`decimal_col`
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 3 (CUSTOM_EDGE)
+ Reducer 3 <- Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: target_table
+ filterExpr: ((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 20 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 10 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: date_col (type: date), decimal_col (type: decimal(38,0))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Reducer 3 => 1
+ keys:
+ 0 _col0 (type: date), _col1 (type: decimal(38,0))
+ 1 _col0 (type: date), _col1 (type: decimal(38,0))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Reducer 3
+ Position of Big Table: 0
+ Statistics: Num rows: 10 Data size: 3360 Basic stats: COMPLETE Column stats: COMPLETE
+ BucketMapJoin: true
+ Select Operator
+ expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1,_col2,_col3,_col4,_col5
+ columns.types date:string:decimal(38,0):date:string:decimal(38,0)
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: target_table
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.target_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.comments
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.target_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.target_table
+ name: default.target_table
+ Truncated Path -> Alias:
+ /target_table [target_table]
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: source_table
+ filterExpr: (if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 7 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: date_col (type: date), decimal_col (type: decimal(38,0))
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: date), _col1 (type: decimal(38,0))
+ null sort order: zz
+ numBuckets: -1
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0))
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ auto parallelism: true
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: source_table
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.source_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.comments
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.source_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.source_table
+ name: default.source_table
+ Truncated Path -> Alias:
+ /source_table [source_table]
+ Reducer 3
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: date), _col1 (type: decimal(38,0))
+ null sort order: zz
+ numBuckets: -1
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: decimal(38,0))
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: 1
+ auto parallelism: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source_table
+PREHOOK: Input: default@target_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source_table
+POSTHOOK: Input: default@target_table
+#### A masked pattern was here ####
+2022-08-16 pipeline 50000000000000000005905545593 2022-08-16 pipeline 50000000000000000005905545593
+2022-08-30 pipeline 50000000000000000005905545593 2022-08-30 pipeline 50000000000000000005905545593
+PREHOOK: query: explain extended
+select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source_table
+PREHOOK: Input: default@target_table
+#### A masked pattern was here ####
+POSTHOOK: query: explain extended
+select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source_table
+POSTHOOK: Input: default@target_table
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT `t0`.`date_col`, CAST('pipeline' AS STRING) AS `string_col`, `t0`.`decimal_col`, `t2`.`date_col` AS `date_col1`, 'pipeline' AS `string_col1`, `t2`.`decimal_col` AS `decimal_col1`
+FROM (SELECT `date_col`, `decimal_col`
+FROM `default`.`target_table`
+WHERE `string_col` = 'pipeline' AND CASE WHEN `decimal_col` IS NOT NULL THEN CAST(`decimal_col` AS STRING) = '50000000000000000005905545593' ELSE FALSE END AND (`date_col` IS NOT NULL AND `decimal_col` IS NOT NULL)) AS `t0`
+INNER JOIN (SELECT `date_col`, `decimal_col`
+FROM `default`.`source_table`
+WHERE CASE WHEN `decimal_col` IS NOT NULL THEN CAST(`decimal_col` AS STRING) = '50000000000000000005905545593' ELSE FALSE END AND (`date_col` IS NOT NULL AND `decimal_col` IS NOT NULL)) AS `t2` ON `t0`.`date_col` = `t2`.`date_col` AND `t0`.`decimal_col` = `t2`.`decimal_col`
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: target_table
+ filterExpr: ((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 20 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 10 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: date_col (type: date), decimal_col (type: decimal(38,0))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Map 2 => 3
+ keys:
+ 0 _col0 (type: date), _col1 (type: decimal(38,0))
+ 1 _col0 (type: date), _col1 (type: decimal(38,0))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Map 2
+ Position of Big Table: 0
+ Statistics: Num rows: 30 Data size: 10080 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1,_col2,_col3,_col4,_col5
+ columns.types date:string:decimal(38,0):date:string:decimal(38,0)
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: target_table
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.target_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.comments
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.target_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.target_table
+ name: default.target_table
+ Truncated Path -> Alias:
+ /target_table [target_table]
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: source_table
+ filterExpr: (if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 7 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: date_col (type: date), decimal_col (type: decimal(38,0))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: date), _col1 (type: decimal(38,0))
+ null sort order: zz
+ numBuckets: -1
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0))
+ Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: 1
+ auto parallelism: true
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: source_table
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.source_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.comments
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.source_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.source_table
+ name: default.source_table
+ Truncated Path -> Alias:
+ /source_table [source_table]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source_table
+PREHOOK: Input: default@target_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from target_table inner join
+(select date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source_table
+POSTHOOK: Input: default@target_table
+#### A masked pattern was here ####
+2022-08-16 pipeline 50000000000000000005905545593 2022-08-16 pipeline 50000000000000000005905545593
+2022-08-30 pipeline 50000000000000000005905545593 2022-08-30 pipeline 50000000000000000005905545593
+PREHOOK: query: explain extended
+select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source_table
+PREHOOK: Input: default@target_table
+#### A masked pattern was here ####
+POSTHOOK: query: explain extended
+select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source_table
+POSTHOOK: Input: default@target_table
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT `t0`.`date_col`, CAST('pipeline' AS STRING) AS `string_col`, `t0`.`decimal_col`, `t3`.`date_col` AS `date_col1`, 'pipeline' AS `string_col1`, `t3`.`decimal_col` AS `decimal_col1`
+FROM (SELECT `date_col`, `decimal_col`
+FROM `default`.`target_table`
+WHERE `string_col` = 'pipeline' AND CASE WHEN `decimal_col` IS NOT NULL THEN CAST(`decimal_col` AS STRING) = '50000000000000000005905545593' ELSE FALSE END AND (`date_col` IS NOT NULL AND `decimal_col` IS NOT NULL)) AS `t0`
+INNER JOIN (SELECT `date_col`, `decimal_col`
+FROM `default`.`source_table`
+WHERE CASE WHEN `decimal_col` IS NOT NULL THEN CAST(`decimal_col` AS STRING) = '50000000000000000005905545593' ELSE FALSE END AND (`date_col` IS NOT NULL AND `decimal_col` IS NOT NULL)
+GROUP BY `date_col`, `decimal_col`) AS `t3` ON `t0`.`date_col` = `t3`.`date_col` AND `t0`.`decimal_col` = `t3`.`decimal_col`
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 3 (BROADCAST_EDGE)
+ Reducer 3 <- Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: target_table
+ filterExpr: ((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 20 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: ((string_col = 'pipeline') and if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 10 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: date_col (type: date), decimal_col (type: decimal(38,0))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 1680 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Estimated key counts: Reducer 3 => 1
+ keys:
+ 0 _col0 (type: date), _col1 (type: decimal(38,0))
+ 1 _col0 (type: date), _col1 (type: decimal(38,0))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Reducer 3
+ Position of Big Table: 0
+ Statistics: Num rows: 10 Data size: 3360 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1,_col2,_col3,_col4,_col5
+ columns.types date:string:decimal(38,0):date:string:decimal(38,0)
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: target_table
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.target_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.comments
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.target_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.target_table
+ name: default.target_table
+ Truncated Path -> Alias:
+ /target_table [target_table]
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: source_table
+ filterExpr: (if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 7 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (if(decimal_col is not null, (CAST( decimal_col AS STRING) = '50000000000000000005905545593'), false) and date_col is not null and decimal_col is not null) (type: boolean)
+ Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: date_col (type: date), decimal_col (type: decimal(38,0))
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: date), _col1 (type: decimal(38,0))
+ null sort order: zz
+ numBuckets: -1
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0))
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ auto parallelism: true
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: source_table
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.source_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ properties:
+ bucket_count 7
+ bucket_field_name decimal_col
+ bucketing_version 2
+ column.name.delimiter ,
+ columns date_col,string_col,decimal_col
+ columns.comments
+ columns.types date:string:decimal(38,0)
+#### A masked pattern was here ####
+ name default.source_table
+ serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ transactional true
+ transactional_properties default
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.source_table
+ name: default.source_table
+ Truncated Path -> Alias:
+ /source_table [source_table]
+ Reducer 3
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0))
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: date), _col1 (type: decimal(38,0))
+ null sort order: zz
+ numBuckets: -1
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0))
+ Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: 1
+ auto parallelism: true
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source_table
+PREHOOK: Input: default@target_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from target_table inner join
+(select distinct date_col, 'pipeline' string_col, decimal_col from source_table where coalesce(decimal_col,'') = '50000000000000000005905545593') s
+on s.date_col = target_table.date_col AND s.string_col = target_table.string_col AND s.decimal_col = target_table.decimal_col
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source_table
+POSTHOOK: Input: default@target_table
+#### A masked pattern was here ####
+2022-08-16 pipeline 50000000000000000005905545593 2022-08-16 pipeline 50000000000000000005905545593
+2022-08-30 pipeline 50000000000000000005905545593 2022-08-30 pipeline 50000000000000000005905545593