You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kr...@apache.org on 2023/04/17 06:10:12 UTC
[hive] branch master updated: HIVE-27069: Incorrect results with bucket map join (Dayakar M, reviewed by Krisztian Kasa)
This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new f6732333fa9 HIVE-27069: Incorrect results with bucket map join (Dayakar M, reviewed by Krisztian Kasa)
f6732333fa9 is described below
commit f6732333fa9dae07b7b09eaf76241ea69f2492db
Author: Dayakar M <59...@users.noreply.github.com>
AuthorDate: Mon Apr 17 11:39:59 2023 +0530
HIVE-27069: Incorrect results with bucket map join (Dayakar M, reviewed by Krisztian Kasa)
---
.../test/resources/testconfiguration.properties | 1 +
.../hive/ql/optimizer/ReduceSinkMapJoinProc.java | 3 +-
.../clientpositive/bucketmapjoin_with_subquery.q | 52 +++
.../tez/bucketmapjoin_with_subquery.q.out | 355 +++++++++++++++++++++
4 files changed, 410 insertions(+), 1 deletion(-)
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 46c824bccac..b85c50bc098 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -17,6 +17,7 @@ minitez.query.files.shared=\
# specific to tez and cannot be added to minillap.
minitez.query.files=\
acid_vectorization_original_tez.q,\
+ bucketmapjoin_with_subquery.q,\
delete_orig_table.q,\
explainanalyze_1.q,\
explainanalyze_3.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
index bd6c41819d5..ea192e8af9a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
@@ -277,7 +277,8 @@ public class ReduceSinkMapJoinProc implements SemanticNodeProcessor {
edgeType = EdgeType.CUSTOM_SIMPLE_EDGE;
}
}
- if (edgeType == EdgeType.CUSTOM_EDGE) {
+ if (edgeType == EdgeType.CUSTOM_EDGE || (edgeType == EdgeType.CUSTOM_SIMPLE_EDGE && !mapJoinOp.getConf()
+ .isDynamicPartitionHashJoin())) {
// disable auto parallelism for bucket map joins
parentRS.getConf().setReducerTraits(EnumSet.of(FIXED));
}
diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin_with_subquery.q b/ql/src/test/queries/clientpositive/bucketmapjoin_with_subquery.q
new file mode 100644
index 00000000000..dd7274014c8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/bucketmapjoin_with_subquery.q
@@ -0,0 +1,52 @@
+-- HIVE-27069
+set hive.query.results.cache.enabled=false;
+set hive.compute.query.using.stats=false;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.auto.convert.sortmerge.join.to.mapjoin=true;
+set hive.convert.join.bucket.mapjoin.tez=true;
+set hive.fetch.task.conversion=none;
+set hive.merge.nway.joins=false;
+set hive.optimize.dynamic.partition.hashjoin=true;
+set hive.optimize.index.filter=true;
+set hive.optimize.remove.sq_count_check=true;
+set hive.prewarm.enabled=false;
+set hive.join.inner.residual=false;
+set hive.limit.optimize.enable=true;
+set hive.mapjoin.bucket.cache.size=10000;
+set hive.strict.managed.tables=true;
+set hive.tez.auto.reducer.parallelism=true;
+set hive.tez.bucket.pruning=true;
+
+CREATE TABLE DUP_TEST (id int , in_date timestamp , sample varchar(100)) stored as orc tblproperties('transactional'='true', 'transactional_properties'='default');
+
+CREATE TABLE DUP_TEST_TARGET (id int , in_date timestamp , sample varchar(100)) CLUSTERED by (ID) INTO 5 BUCKETS STORED AS ORC tblproperties('transactional'='true', 'transactional_properties'='default');
+
+INSERT INTO DUP_TEST
+(id , in_date , sample)
+values
+(1 , '2023-04-14 10:11:12.111' , 'test1'),
+(2 , '2023-04-14 10:11:12.111' , 'test2'),
+(3 , '2023-04-14 10:11:12.111' , 'test3'),
+(4 , '2023-04-14 10:11:12.111' , 'test4'),
+(5 , '2023-04-14 10:11:12.111' , 'test5'),
+(6 , '2023-04-14 10:11:12.111' , 'test6'),
+(7 , '2023-04-14 10:11:12.111' , 'test7'),
+(8 , '2023-04-14 10:11:12.111' , 'test8'),
+(9 , '2023-04-14 10:11:12.111' , 'test9');
+
+-- Run merge into the target table for the first time
+MERGE INTO DUP_TEST_TARGET T USING (SELECT id , in_date , sample FROM (SELECT id , in_date , sample ,ROW_NUMBER()
+OVER(PARTITION BY id ORDER BY in_date DESC ) AS ROW_NUMB FROM DUP_TEST) OUTQUERY WHERE ROW_NUMB =1) as S ON T.id = S.id
+ WHEN MATCHED THEN UPDATE SET in_date = S.in_date , sample = S.sample
+ WHEN NOT MATCHED THEN INSERT VALUES (S.id, S.in_date , S.sample);
+
+explain vectorization detail select * from DUP_TEST_TARGET T join (SELECT id , in_date , sample FROM (SELECT id , in_date , sample ,ROW_NUMBER()
+OVER(PARTITION BY id ORDER BY in_date DESC ) AS ROW_NUMB FROM DUP_TEST) OUTQUERY WHERE ROW_NUMB =1) as S ON T.id = S.id;
+
+select * from DUP_TEST_TARGET T join (SELECT id , in_date , sample FROM (SELECT id , in_date , sample ,ROW_NUMBER()
+OVER(PARTITION BY id ORDER BY in_date DESC ) AS ROW_NUMB FROM DUP_TEST) OUTQUERY WHERE ROW_NUMB =1) as S ON T.id = S.id;
+
+select * from DUP_TEST_TARGET T join DUP_TEST S ON T.id = S.id;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/tez/bucketmapjoin_with_subquery.q.out b/ql/src/test/results/clientpositive/tez/bucketmapjoin_with_subquery.q.out
new file mode 100644
index 00000000000..7ce5bb945e8
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/bucketmapjoin_with_subquery.q.out
@@ -0,0 +1,355 @@
+PREHOOK: query: CREATE TABLE DUP_TEST (id int , in_date timestamp , sample varchar(100)) stored as orc tblproperties('transactional'='true', 'transactional_properties'='default')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@DUP_TEST
+POSTHOOK: query: CREATE TABLE DUP_TEST (id int , in_date timestamp , sample varchar(100)) stored as orc tblproperties('transactional'='true', 'transactional_properties'='default')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@DUP_TEST
+PREHOOK: query: CREATE TABLE DUP_TEST_TARGET (id int , in_date timestamp , sample varchar(100)) CLUSTERED by (ID) INTO 5 BUCKETS STORED AS ORC tblproperties('transactional'='true', 'transactional_properties'='default')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@DUP_TEST_TARGET
+POSTHOOK: query: CREATE TABLE DUP_TEST_TARGET (id int , in_date timestamp , sample varchar(100)) CLUSTERED by (ID) INTO 5 BUCKETS STORED AS ORC tblproperties('transactional'='true', 'transactional_properties'='default')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@DUP_TEST_TARGET
+PREHOOK: query: INSERT INTO DUP_TEST
+(id , in_date , sample)
+values
+(1 , '2023-04-14 10:11:12.111' , 'test1'),
+(2 , '2023-04-14 10:11:12.111' , 'test2'),
+(3 , '2023-04-14 10:11:12.111' , 'test3'),
+(4 , '2023-04-14 10:11:12.111' , 'test4'),
+(5 , '2023-04-14 10:11:12.111' , 'test5'),
+(6 , '2023-04-14 10:11:12.111' , 'test6'),
+(7 , '2023-04-14 10:11:12.111' , 'test7'),
+(8 , '2023-04-14 10:11:12.111' , 'test8'),
+(9 , '2023-04-14 10:11:12.111' , 'test9')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@dup_test
+POSTHOOK: query: INSERT INTO DUP_TEST
+(id , in_date , sample)
+values
+(1 , '2023-04-14 10:11:12.111' , 'test1'),
+(2 , '2023-04-14 10:11:12.111' , 'test2'),
+(3 , '2023-04-14 10:11:12.111' , 'test3'),
+(4 , '2023-04-14 10:11:12.111' , 'test4'),
+(5 , '2023-04-14 10:11:12.111' , 'test5'),
+(6 , '2023-04-14 10:11:12.111' , 'test6'),
+(7 , '2023-04-14 10:11:12.111' , 'test7'),
+(8 , '2023-04-14 10:11:12.111' , 'test8'),
+(9 , '2023-04-14 10:11:12.111' , 'test9')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@dup_test
+POSTHOOK: Lineage: dup_test.id SCRIPT []
+POSTHOOK: Lineage: dup_test.in_date SCRIPT []
+POSTHOOK: Lineage: dup_test.sample SCRIPT []
+PREHOOK: query: MERGE INTO DUP_TEST_TARGET T USING (SELECT id , in_date , sample FROM (SELECT id , in_date , sample ,ROW_NUMBER()
+OVER(PARTITION BY id ORDER BY in_date DESC ) AS ROW_NUMB FROM DUP_TEST) OUTQUERY WHERE ROW_NUMB =1) as S ON T.id = S.id
+ WHEN MATCHED THEN UPDATE SET in_date = S.in_date , sample = S.sample
+ WHEN NOT MATCHED THEN INSERT VALUES (S.id, S.in_date , S.sample)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dup_test
+PREHOOK: Input: default@dup_test_target
+PREHOOK: Output: default@dup_test_target
+PREHOOK: Output: default@dup_test_target
+PREHOOK: Output: default@merge_tmp_table
+POSTHOOK: query: MERGE INTO DUP_TEST_TARGET T USING (SELECT id , in_date , sample FROM (SELECT id , in_date , sample ,ROW_NUMBER()
+OVER(PARTITION BY id ORDER BY in_date DESC ) AS ROW_NUMB FROM DUP_TEST) OUTQUERY WHERE ROW_NUMB =1) as S ON T.id = S.id
+ WHEN MATCHED THEN UPDATE SET in_date = S.in_date , sample = S.sample
+ WHEN NOT MATCHED THEN INSERT VALUES (S.id, S.in_date , S.sample)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dup_test
+POSTHOOK: Input: default@dup_test_target
+POSTHOOK: Output: default@dup_test_target
+POSTHOOK: Output: default@dup_test_target
+POSTHOOK: Output: default@merge_tmp_table
+POSTHOOK: Lineage: dup_test_target.id SIMPLE [(dup_test)dup_test.FieldSchema(name:id, type:int, comment:null), ]
+POSTHOOK: Lineage: dup_test_target.id SIMPLE [(dup_test)dup_test.FieldSchema(name:id, type:int, comment:null), ]
+POSTHOOK: Lineage: dup_test_target.in_date SIMPLE [(dup_test)dup_test.FieldSchema(name:in_date, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: dup_test_target.in_date SIMPLE [(dup_test)dup_test.FieldSchema(name:in_date, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: dup_test_target.sample SIMPLE [(dup_test)dup_test.FieldSchema(name:sample, type:varchar(100), comment:null), ]
+POSTHOOK: Lineage: dup_test_target.sample SIMPLE [(dup_test)dup_test.FieldSchema(name:sample, type:varchar(100), comment:null), ]
+POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(dup_test_target)dup_test_target.FieldSchema(name:ROW__ID, type:struct<writeId:bigint,bucketId:int,rowId:bigint>, comment:), ]
+PREHOOK: query: explain vectorization detail select * from DUP_TEST_TARGET T join (SELECT id , in_date , sample FROM (SELECT id , in_date , sample ,ROW_NUMBER()
+OVER(PARTITION BY id ORDER BY in_date DESC ) AS ROW_NUMB FROM DUP_TEST) OUTQUERY WHERE ROW_NUMB =1) as S ON T.id = S.id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dup_test
+PREHOOK: Input: default@dup_test_target
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain vectorization detail select * from DUP_TEST_TARGET T join (SELECT id , in_date , sample FROM (SELECT id , in_date , sample ,ROW_NUMBER()
+OVER(PARTITION BY id ORDER BY in_date DESC ) AS ROW_NUMB FROM DUP_TEST) OUTQUERY WHERE ROW_NUMB =1) as S ON T.id = S.id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dup_test
+POSTHOOK: Input: default@dup_test_target
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t
+ filterExpr: id is not null (type: boolean)
+ Statistics: Num rows: 9 Data size: 2052 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:id:int, 1:in_date:timestamp, 2:sample:varchar(100), 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 4:ROW__IS__DELETED:boolean]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0:int)
+ predicate: id is not null (type: boolean)
+ Statistics: Num rows: 9 Data size: 2052 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: id (type: int), in_date (type: timestamp), sample (type: varchar(100))
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2]
+ Statistics: Num rows: 9 Data size: 2052 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: 0:int
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: 0:int
+ valueColumns: 1:timestamp, 2:varchar(100)
+ Statistics: Num rows: 9 Data size: 2052 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: timestamp), _col2 (type: varchar(100))
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ includeColumns: [0, 1, 2]
+ dataColumns: id:int, in_date:timestamp, sample:varchar(100)
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: dup_test
+ filterExpr: id is not null (type: boolean)
+ Statistics: Num rows: 9 Data size: 1197 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:id:int, 1:in_date:timestamp, 2:sample:varchar(100), 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 4:ROW__IS__DELETED:boolean]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0:int)
+ predicate: id is not null (type: boolean)
+ Statistics: Num rows: 9 Data size: 1197 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: id (type: int), in_date (type: timestamp)
+ null sort order: aa
+ sort order: +-
+ Map-reduce partition columns: id (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: 0:int, 1:timestamp
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumns: 0:int
+ valueColumns: 2:varchar(100)
+ Statistics: Num rows: 9 Data size: 1197 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: sample (type: varchar(100))
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ includeColumns: [0, 1, 2]
+ dataColumns: id:int, in_date:timestamp, sample:varchar(100)
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 3
+ Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: +-
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:timestamp, VALUE._col0:varchar(100)
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, timestamp, string]
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: timestamp), VALUE._col0 (type: varchar(100))
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2]
+ Statistics: Num rows: 9 Data size: 1197 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: int, _col1: timestamp, _col2: varchar(100)
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 DESC NULLS FIRST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: ROW_NUMBER_window_0
+ name: ROW_NUMBER
+ window function: GenericUDAFRowNumberEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ PTF Vectorization:
+ allEvaluatorsAreStreaming: true
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorRowNumber]
+ functionInputExpressions: [null]
+ functionNames: [ROW_NUMBER]
+ keyInputColumns: [0, 1]
+ native: true
+ nonKeyInputColumns: [2]
+ orderExpressions: [col 1:timestamp]
+ outputColumns: [3, 0, 1, 2]
+ outputTypes: [int, int, timestamp, varchar(100)]
+ partitionExpressions: [col 0:int]
+ streamingColumns: [3]
+ Statistics: Num rows: 9 Data size: 1197 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColEqualLongScalar(col 3:int, val 1)
+ predicate: (ROW_NUMBER_window_0 = 1) (type: boolean)
+ Statistics: Num rows: 4 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: timestamp), _col2 (type: varchar(100))
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2]
+ Statistics: Num rows: 4 Data size: 532 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Map Join Vectorization:
+ bigTableKeyColumns: 0:int
+ bigTableRetainColumnNums: [0, 1, 2]
+ bigTableValueColumns: 0:int, 1:timestamp, 2:varchar(100)
+ className: VectorMapJoinInnerLongOperator
+ native: true
+ nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
+ nonOuterSmallTableKeyMapping: []
+ projectedOutput: 0:int, 4:timestamp, 5:varchar(100), 0:int, 1:timestamp, 2:varchar(100)
+ smallTableValueMapping: 4:timestamp, 5:varchar(100)
+ hashTableImplementationType: OPTIMIZED
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 9 Data size: 2257 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 9 Data size: 2257 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from DUP_TEST_TARGET T join (SELECT id , in_date , sample FROM (SELECT id , in_date , sample ,ROW_NUMBER()
+OVER(PARTITION BY id ORDER BY in_date DESC ) AS ROW_NUMB FROM DUP_TEST) OUTQUERY WHERE ROW_NUMB =1) as S ON T.id = S.id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dup_test
+PREHOOK: Input: default@dup_test_target
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from DUP_TEST_TARGET T join (SELECT id , in_date , sample FROM (SELECT id , in_date , sample ,ROW_NUMBER()
+OVER(PARTITION BY id ORDER BY in_date DESC ) AS ROW_NUMB FROM DUP_TEST) OUTQUERY WHERE ROW_NUMB =1) as S ON T.id = S.id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dup_test
+POSTHOOK: Input: default@dup_test_target
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+2 2023-04-14 10:11:12.111 test2 2 2023-04-14 10:11:12.111 test2
+3 2023-04-14 10:11:12.111 test3 3 2023-04-14 10:11:12.111 test3
+6 2023-04-14 10:11:12.111 test6 6 2023-04-14 10:11:12.111 test6
+7 2023-04-14 10:11:12.111 test7 7 2023-04-14 10:11:12.111 test7
+1 2023-04-14 10:11:12.111 test1 1 2023-04-14 10:11:12.111 test1
+4 2023-04-14 10:11:12.111 test4 4 2023-04-14 10:11:12.111 test4
+5 2023-04-14 10:11:12.111 test5 5 2023-04-14 10:11:12.111 test5
+8 2023-04-14 10:11:12.111 test8 8 2023-04-14 10:11:12.111 test8
+9 2023-04-14 10:11:12.111 test9 9 2023-04-14 10:11:12.111 test9
+PREHOOK: query: select * from DUP_TEST_TARGET T join DUP_TEST S ON T.id = S.id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dup_test
+PREHOOK: Input: default@dup_test_target
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from DUP_TEST_TARGET T join DUP_TEST S ON T.id = S.id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dup_test
+POSTHOOK: Input: default@dup_test_target
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1 2023-04-14 10:11:12.111 test1 1 2023-04-14 10:11:12.111 test1
+8 2023-04-14 10:11:12.111 test8 8 2023-04-14 10:11:12.111 test8
+4 2023-04-14 10:11:12.111 test4 4 2023-04-14 10:11:12.111 test4
+5 2023-04-14 10:11:12.111 test5 5 2023-04-14 10:11:12.111 test5
+3 2023-04-14 10:11:12.111 test3 3 2023-04-14 10:11:12.111 test3
+7 2023-04-14 10:11:12.111 test7 7 2023-04-14 10:11:12.111 test7
+9 2023-04-14 10:11:12.111 test9 9 2023-04-14 10:11:12.111 test9
+2 2023-04-14 10:11:12.111 test2 2 2023-04-14 10:11:12.111 test2
+6 2023-04-14 10:11:12.111 test6 6 2023-04-14 10:11:12.111 test6