You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by za...@apache.org on 2021/08/10 09:16:04 UTC
[hive] branch master updated: HIVE-25410: CommonMergeJoin fails for
ARRAY join keys with varying sizes (okumin reviewed by Stamatis Zampetakis)
This is an automated email from the ASF dual-hosted git repository.
zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 9b5feec HIVE-25410: CommonMergeJoin fails for ARRAY join keys with varying sizes (okumin reviewed by Stamatis Zampetakis)
9b5feec is described below
commit 9b5feec4dc475d6ee38c6d11130fb19d351a718d
Author: okumin <gi...@okumin.com>
AuthorDate: Sat Jul 31 01:31:11 2021 +0900
HIVE-25410: CommonMergeJoin fails for ARRAY join keys with varying sizes (okumin reviewed by Stamatis Zampetakis)
CommonMergeJoinOperator fails with ArrayIndexOutOfBoundsException when
the column contains ARRAYs of different size. Before this change the
comparators were created only once for the first comparison and they
were reused afterwards causing exception when subsequent ARRAY elements
were bigger.
STRUCT are not affected since the number of elements is consistent
across records.
Closes #2551
---
.../hadoop/hive/ql/exec/HiveStructComparator.java | 19 ++-
.../hive/ql/exec/WritableComparatorFactory.java | 3 +-
.../clientpositive/smb_mapjoin_complex_type.q | 4 +-
.../clientpositive/test_join_complex_type.q | 8 +-
.../llap/test_join_complex_type.q.out | 186 +++++++++++++++------
.../clientpositive/smb_mapjoin_complex_type.q.out | 19 ++-
6 files changed, 162 insertions(+), 77 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/HiveStructComparator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/HiveStructComparator.java
index 50d02cc..d8abf72 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/HiveStructComparator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/HiveStructComparator.java
@@ -19,12 +19,15 @@ package org.apache.hadoop.hive.ql.exec;
import org.apache.hadoop.hive.ql.util.NullOrdering;
-import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
+import java.util.ArrayList;
import java.util.List;
+/**
+ * A WritableComparator to compare STRUCT or ARRAY objects.
+ */
final class HiveStructComparator extends HiveWritableComparator {
- private WritableComparator[] comparator = null;
+ private final List<WritableComparator> comparators = new ArrayList<>();
HiveStructComparator(boolean nullSafe, NullOrdering nullOrdering) {
super(nullSafe, nullOrdering);
@@ -45,16 +48,14 @@ final class HiveStructComparator extends HiveWritableComparator {
if (a1.size() == 0) {
return 0;
}
- if (comparator == null) {
- comparator = new WritableComparator[a1.size()];
- // For struct all elements may not be of same type, so create comparator for each entry.
- for (int i = 0; i < a1.size(); i++) {
- comparator[i] = WritableComparatorFactory.get(a1.get(i), nullSafe, nullOrdering);
- }
+ // For array, the length may not be fixed, so extend comparators on demand
+ for (int i = comparators.size(); i < a1.size(); i++) {
+ // For struct, all elements may not be of same type, so create comparator for each entry.
+ comparators.add(i, WritableComparatorFactory.get(a1.get(i), nullSafe, nullOrdering));
}
result = 0;
for (int i = 0; i < a1.size(); i++) {
- result = comparator[i].compare(a1.get(i), a2.get(i));
+ result = comparators.get(i).compare(a1.get(i), a2.get(i));
if (result != 0) {
return result;
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/WritableComparatorFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/WritableComparatorFactory.java
index 17ae06d..ff9ada4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/WritableComparatorFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/WritableComparatorFactory.java
@@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.exec;
import org.apache.hadoop.hive.ql.util.NullOrdering;
import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion;
-import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import java.util.List;
import java.util.Map;
@@ -27,7 +26,7 @@ import java.util.Map;
public final class WritableComparatorFactory {
public static WritableComparator get(Object key, boolean nullSafe, NullOrdering nullOrdering) {
if (key instanceof List) {
- // For array type struct is used as we do not know if all elements of array are of same type.
+ // STRUCT or ARRAY are expressed as java.util.List
return new HiveStructComparator(nullSafe, nullOrdering);
} else if (key instanceof Map) {
// TODO : https://issues.apache.org/jira/browse/HIVE-25042
diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_complex_type.q b/ql/src/test/queries/clientpositive/smb_mapjoin_complex_type.q
index 002746c..4842008 100644
--- a/ql/src/test/queries/clientpositive/smb_mapjoin_complex_type.q
+++ b/ql/src/test/queries/clientpositive/smb_mapjoin_complex_type.q
@@ -10,10 +10,10 @@ set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
CREATE TABLE test_list1 (key INT, value array<int>, col_1 STRING) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS;
-INSERT INTO test_list1 VALUES (99, array(0,0), 'Alice'), (99, array(2,2), 'Mat'), (100, array(0,0), 'Bob'), (101, array(2,2), 'Car');
+INSERT INTO test_list1 VALUES (99, array(0,0), 'Alice'), (99, array(2,2), 'Mat'), (100, array(0,0), 'Bob'), (101, array(2,2), 'Car'), (102, array(1, 2, 3, 4), 'Mallory');
CREATE TABLE test_list2 (key INT, value array<int>, col_2 STRING) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS;
-INSERT INTO test_list2 VALUES (102, array(2,2), 'Del'), (103, array(2,2), 'Ema'), (104, array(3,3), 'Fli');
+INSERT INTO test_list2 VALUES (102, array(2,2), 'Del'), (103, array(2,2), 'Ema'), (104, array(3,3), 'Fli'), (105, array(1, 2, 3, 4), 'Victor');
EXPLAIN
SELECT *
diff --git a/ql/src/test/queries/clientpositive/test_join_complex_type.q b/ql/src/test/queries/clientpositive/test_join_complex_type.q
index 7b72a5a..af0cd7c 100644
--- a/ql/src/test/queries/clientpositive/test_join_complex_type.q
+++ b/ql/src/test/queries/clientpositive/test_join_complex_type.q
@@ -4,11 +4,17 @@ insert into table_list_types VALUES (2, array(1,2), array(2,2));
insert into table_list_types VALUES (3, array(1,3), array(2,3));
insert into table_list_types VALUES (4, array(1,4), array(1,4));
insert into table_list_types VALUES (5, array(1,4), array(null,4));
+insert into table_list_types VALUES (6, array(1,1,1), array(1,2,3));
+insert into table_list_types VALUES (7, array(1,2,3), array(3,2,1));
+insert into table_list_types VALUES (8, array(1,1,1,1), array(4,3,2,1));
create table table_list_types1 (id int, c1 array<int>, c2 array<int>);
insert into table_list_types1 VALUES (1, array(1,1), array(2,1));
insert into table_list_types1 VALUES (2, array(1,2), array(2,2));
insert into table_list_types1 VALUES (3, array(1,4), array(1,3));
+insert into table_list_types1 VALUES (4, array(1,1,1), array(1,2,3));
+insert into table_list_types1 VALUES (5, array(1,2,3), array(2,2,2));
+insert into table_list_types1 VALUES (6, array(1,1,1,1), array(2,2,2,2));
set hive.cbo.enable=false;
set hive.auto.convert.join=false;
@@ -78,4 +84,4 @@ explain select * from table_struct_types t1 inner join table_struct_types1 t2 on
select * from table_struct_types t1 inner join table_struct_types1 t2 on t1.c1 = t2.c1;
explain select * from table_struct_types t1 inner join table_struct_types1 t2 on t1.c2 = t2.c2;
-select * from table_struct_types t1 inner join table_struct_types1 t2 on t1.c2 = t2.c2;
\ No newline at end of file
+select * from table_struct_types t1 inner join table_struct_types1 t2 on t1.c2 = t2.c2;
diff --git a/ql/src/test/results/clientpositive/llap/test_join_complex_type.q.out b/ql/src/test/results/clientpositive/llap/test_join_complex_type.q.out
index cbd7ae1..da9179b 100644
--- a/ql/src/test/results/clientpositive/llap/test_join_complex_type.q.out
+++ b/ql/src/test/results/clientpositive/llap/test_join_complex_type.q.out
@@ -61,6 +61,39 @@ POSTHOOK: Output: default@table_list_types
POSTHOOK: Lineage: table_list_types.c1 SCRIPT []
POSTHOOK: Lineage: table_list_types.c2 SCRIPT []
POSTHOOK: Lineage: table_list_types.id SCRIPT []
+PREHOOK: query: insert into table_list_types VALUES (6, array(1,1,1), array(1,2,3))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table_list_types
+POSTHOOK: query: insert into table_list_types VALUES (6, array(1,1,1), array(1,2,3))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table_list_types
+POSTHOOK: Lineage: table_list_types.c1 SCRIPT []
+POSTHOOK: Lineage: table_list_types.c2 SCRIPT []
+POSTHOOK: Lineage: table_list_types.id SCRIPT []
+PREHOOK: query: insert into table_list_types VALUES (7, array(1,2,3), array(3,2,1))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table_list_types
+POSTHOOK: query: insert into table_list_types VALUES (7, array(1,2,3), array(3,2,1))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table_list_types
+POSTHOOK: Lineage: table_list_types.c1 SCRIPT []
+POSTHOOK: Lineage: table_list_types.c2 SCRIPT []
+POSTHOOK: Lineage: table_list_types.id SCRIPT []
+PREHOOK: query: insert into table_list_types VALUES (8, array(1,1,1,1), array(4,3,2,1))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table_list_types
+POSTHOOK: query: insert into table_list_types VALUES (8, array(1,1,1,1), array(4,3,2,1))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table_list_types
+POSTHOOK: Lineage: table_list_types.c1 SCRIPT []
+POSTHOOK: Lineage: table_list_types.c2 SCRIPT []
+POSTHOOK: Lineage: table_list_types.id SCRIPT []
PREHOOK: query: create table table_list_types1 (id int, c1 array<int>, c2 array<int>)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
@@ -102,6 +135,39 @@ POSTHOOK: Output: default@table_list_types1
POSTHOOK: Lineage: table_list_types1.c1 SCRIPT []
POSTHOOK: Lineage: table_list_types1.c2 SCRIPT []
POSTHOOK: Lineage: table_list_types1.id SCRIPT []
+PREHOOK: query: insert into table_list_types1 VALUES (4, array(1,1,1), array(1,2,3))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table_list_types1
+POSTHOOK: query: insert into table_list_types1 VALUES (4, array(1,1,1), array(1,2,3))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table_list_types1
+POSTHOOK: Lineage: table_list_types1.c1 SCRIPT []
+POSTHOOK: Lineage: table_list_types1.c2 SCRIPT []
+POSTHOOK: Lineage: table_list_types1.id SCRIPT []
+PREHOOK: query: insert into table_list_types1 VALUES (5, array(1,2,3), array(2,2,2))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table_list_types1
+POSTHOOK: query: insert into table_list_types1 VALUES (5, array(1,2,3), array(2,2,2))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table_list_types1
+POSTHOOK: Lineage: table_list_types1.c1 SCRIPT []
+POSTHOOK: Lineage: table_list_types1.c2 SCRIPT []
+POSTHOOK: Lineage: table_list_types1.id SCRIPT []
+PREHOOK: query: insert into table_list_types1 VALUES (6, array(1,1,1,1), array(2,2,2,2))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table_list_types1
+POSTHOOK: query: insert into table_list_types1 VALUES (6, array(1,1,1,1), array(2,2,2,2))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table_list_types1
+POSTHOOK: Lineage: table_list_types1.c1 SCRIPT []
+POSTHOOK: Lineage: table_list_types1.c2 SCRIPT []
+POSTHOOK: Lineage: table_list_types1.id SCRIPT []
PREHOOK: query: explain select * from table_list_types t1 inner join table_list_types1 t2 on t1.c1 = t2.c1
PREHOOK: type: QUERY
PREHOOK: Input: default@table_list_types
@@ -128,16 +194,16 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t1
- Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: c1 is not null (type: boolean)
- Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: c1 (type: array<int>)
null sort order: z
sort order: +
Map-reduce partition columns: c1 (type: array<int>)
- Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE
value expressions: id (type: int), c2 (type: array<int>)
Execution mode: vectorized, llap
LLAP IO: all inputs
@@ -145,16 +211,16 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t2
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: c1 is not null (type: boolean)
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: c1 (type: array<int>)
null sort order: z
sort order: +
Map-reduce partition columns: c1 (type: array<int>)
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
value expressions: id (type: int), c2 (type: array<int>)
Execution mode: vectorized, llap
LLAP IO: all inputs
@@ -168,14 +234,14 @@ STAGE PLANS:
0 c1 (type: array<int>)
1 c1 (type: array<int>)
outputColumnNames: _col0, _col1, _col2, _col7, _col8, _col9
- Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: array<int>), _col2 (type: array<int>), _col7 (type: int), _col8 (type: array<int>), _col9 (type: array<int>)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -198,7 +264,10 @@ POSTHOOK: Input: default@table_list_types
POSTHOOK: Input: default@table_list_types1
#### A masked pattern was here ####
1 [1,1] [2,1] 1 [1,1] [2,1]
+6 [1,1,1] [1,2,3] 4 [1,1,1] [1,2,3]
+8 [1,1,1,1] [4,3,2,1] 6 [1,1,1,1] [2,2,2,2]
2 [1,2] [2,2] 2 [1,2] [2,2]
+7 [1,2,3] [3,2,1] 5 [1,2,3] [2,2,2]
5 [1,4] [null,4] 3 [1,4] [1,3]
4 [1,4] [1,4] 3 [1,4] [1,3]
PREHOOK: query: explain select * from table_list_types t1 inner join table_list_types1 t2 on t1.c2 = t2.c2
@@ -227,16 +296,16 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t1
- Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: c2 is not null (type: boolean)
- Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: c2 (type: array<int>)
null sort order: z
sort order: +
Map-reduce partition columns: c2 (type: array<int>)
- Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE
value expressions: id (type: int), c1 (type: array<int>)
Execution mode: vectorized, llap
LLAP IO: all inputs
@@ -244,16 +313,16 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t2
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: c2 is not null (type: boolean)
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: c2 (type: array<int>)
null sort order: z
sort order: +
Map-reduce partition columns: c2 (type: array<int>)
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
value expressions: id (type: int), c1 (type: array<int>)
Execution mode: vectorized, llap
LLAP IO: all inputs
@@ -267,14 +336,14 @@ STAGE PLANS:
0 c2 (type: array<int>)
1 c2 (type: array<int>)
outputColumnNames: _col0, _col1, _col2, _col7, _col8, _col9
- Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: array<int>), _col2 (type: array<int>), _col7 (type: int), _col8 (type: array<int>), _col9 (type: array<int>)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -296,6 +365,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@table_list_types
POSTHOOK: Input: default@table_list_types1
#### A masked pattern was here ####
+6 [1,1,1] [1,2,3] 4 [1,1,1] [1,2,3]
1 [1,1] [2,1] 1 [1,1] [2,1]
2 [1,2] [2,2] 2 [1,2] [2,2]
PREHOOK: query: explain select * from table_list_types t1 inner join table_list_types1 t2 on t1.c1 = t2.c1
@@ -324,14 +394,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t1
- Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: c1 is not null (type: boolean)
- Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int), c1 (type: array<int>), c2 (type: array<int>)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -341,10 +411,10 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
input vertices:
1 Map 2
- Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -355,20 +425,20 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t2
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: c1 is not null (type: boolean)
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int), c1 (type: array<int>), c2 (type: array<int>)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: array<int>)
null sort order: z
sort order: +
Map-reduce partition columns: _col1 (type: array<int>)
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col2 (type: array<int>)
Execution mode: vectorized, llap
LLAP IO: all inputs
@@ -389,6 +459,9 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@table_list_types
POSTHOOK: Input: default@table_list_types1
#### A masked pattern was here ####
+8 [1,1,1,1] [4,3,2,1] 6 [1,1,1,1] [2,2,2,2]
+6 [1,1,1] [1,2,3] 4 [1,1,1] [1,2,3]
+7 [1,2,3] [3,2,1] 5 [1,2,3] [2,2,2]
5 [1,4] [null,4] 3 [1,4] [1,3]
1 [1,1] [2,1] 1 [1,1] [2,1]
2 [1,2] [2,2] 2 [1,2] [2,2]
@@ -419,14 +492,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t1
- Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: c2 is not null (type: boolean)
- Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int), c1 (type: array<int>), c2 (type: array<int>)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -436,10 +509,10 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
input vertices:
1 Map 2
- Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -450,20 +523,20 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t2
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: c2 is not null (type: boolean)
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int), c1 (type: array<int>), c2 (type: array<int>)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: array<int>)
null sort order: z
sort order: +
Map-reduce partition columns: _col2 (type: array<int>)
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: array<int>)
Execution mode: vectorized, llap
LLAP IO: all inputs
@@ -484,6 +557,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@table_list_types
POSTHOOK: Input: default@table_list_types1
#### A masked pattern was here ####
+6 [1,1,1] [1,2,3] 4 [1,1,1] [1,2,3]
1 [1,1] [2,1] 1 [1,1] [2,1]
2 [1,2] [2,2] 2 [1,2] [2,2]
PREHOOK: query: explain select * from table_list_types t1 inner join table_list_types1 t2 on t1.c1 = t2.c1
@@ -513,14 +587,14 @@ STAGE PLANS:
TableScan
alias: t1
filterExpr: c1 is not null (type: boolean)
- Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: c1 is not null (type: boolean)
- Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int), c1 (type: array<int>), c2 (type: array<int>)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -530,10 +604,10 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
input vertices:
1 Map 2
- Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -545,20 +619,20 @@ STAGE PLANS:
TableScan
alias: t2
filterExpr: c1 is not null (type: boolean)
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: c1 is not null (type: boolean)
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int), c1 (type: array<int>), c2 (type: array<int>)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: array<int>)
null sort order: z
sort order: +
Map-reduce partition columns: _col1 (type: array<int>)
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col2 (type: array<int>)
Execution mode: vectorized, llap
LLAP IO: all inputs
@@ -579,6 +653,9 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@table_list_types
POSTHOOK: Input: default@table_list_types1
#### A masked pattern was here ####
+8 [1,1,1,1] [4,3,2,1] 6 [1,1,1,1] [2,2,2,2]
+6 [1,1,1] [1,2,3] 4 [1,1,1] [1,2,3]
+7 [1,2,3] [3,2,1] 5 [1,2,3] [2,2,2]
5 [1,4] [null,4] 3 [1,4] [1,3]
1 [1,1] [2,1] 1 [1,1] [2,1]
2 [1,2] [2,2] 2 [1,2] [2,2]
@@ -610,14 +687,14 @@ STAGE PLANS:
TableScan
alias: t1
filterExpr: c2 is not null (type: boolean)
- Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: c2 is not null (type: boolean)
- Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int), c1 (type: array<int>), c2 (type: array<int>)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -627,10 +704,10 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
input vertices:
1 Map 2
- Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -642,20 +719,20 @@ STAGE PLANS:
TableScan
alias: t2
filterExpr: c2 is not null (type: boolean)
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: c2 is not null (type: boolean)
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int), c1 (type: array<int>), c2 (type: array<int>)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: array<int>)
null sort order: z
sort order: +
Map-reduce partition columns: _col2 (type: array<int>)
- Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: array<int>)
Execution mode: vectorized, llap
LLAP IO: all inputs
@@ -676,6 +753,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@table_list_types
POSTHOOK: Input: default@table_list_types1
#### A masked pattern was here ####
+6 [1,1,1] [1,2,3] 4 [1,1,1] [1,2,3]
1 [1,1] [2,1] 1 [1,1] [2,1]
2 [1,2] [2,2] 2 [1,2] [2,2]
PREHOOK: query: create table table_struct_types (id int, c1 struct<f1: int,f2: string>, c2 struct<f1: int,f2: string>)
diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_complex_type.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_complex_type.q.out
index b65f64f..5cf76de 100644
--- a/ql/src/test/results/clientpositive/smb_mapjoin_complex_type.q.out
+++ b/ql/src/test/results/clientpositive/smb_mapjoin_complex_type.q.out
@@ -6,11 +6,11 @@ POSTHOOK: query: CREATE TABLE test_list1 (key INT, value array<int>, col_1 STRIN
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@test_list1
-PREHOOK: query: INSERT INTO test_list1 VALUES (99, array(0,0), 'Alice'), (99, array(2,2), 'Mat'), (100, array(0,0), 'Bob'), (101, array(2,2), 'Car')
+PREHOOK: query: INSERT INTO test_list1 VALUES (99, array(0,0), 'Alice'), (99, array(2,2), 'Mat'), (100, array(0,0), 'Bob'), (101, array(2,2), 'Car'), (102, array(1, 2, 3, 4), 'Mallory')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@test_list1
-POSTHOOK: query: INSERT INTO test_list1 VALUES (99, array(0,0), 'Alice'), (99, array(2,2), 'Mat'), (100, array(0,0), 'Bob'), (101, array(2,2), 'Car')
+POSTHOOK: query: INSERT INTO test_list1 VALUES (99, array(0,0), 'Alice'), (99, array(2,2), 'Mat'), (100, array(0,0), 'Bob'), (101, array(2,2), 'Car'), (102, array(1, 2, 3, 4), 'Mallory')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@test_list1
@@ -25,11 +25,11 @@ POSTHOOK: query: CREATE TABLE test_list2 (key INT, value array<int>, col_2 STRIN
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@test_list2
-PREHOOK: query: INSERT INTO test_list2 VALUES (102, array(2,2), 'Del'), (103, array(2,2), 'Ema'), (104, array(3,3), 'Fli')
+PREHOOK: query: INSERT INTO test_list2 VALUES (102, array(2,2), 'Del'), (103, array(2,2), 'Ema'), (104, array(3,3), 'Fli'), (105, array(1, 2, 3, 4), 'Victor')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@test_list2
-POSTHOOK: query: INSERT INTO test_list2 VALUES (102, array(2,2), 'Del'), (103, array(2,2), 'Ema'), (104, array(3,3), 'Fli')
+POSTHOOK: query: INSERT INTO test_list2 VALUES (102, array(2,2), 'Del'), (103, array(2,2), 'Ema'), (104, array(3,3), 'Fli'), (105, array(1, 2, 3, 4), 'Victor')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@test_list2
@@ -63,14 +63,14 @@ STAGE PLANS:
TableScan
alias: test_list1
filterExpr: value is not null (type: boolean)
- Statistics: Num rows: 4 Data size: 1232 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 1540 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: value is not null (type: boolean)
- Statistics: Num rows: 4 Data size: 1232 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 1540 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int), value (type: array<int>), col_1 (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 4 Data size: 1232 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 1540 Basic stats: COMPLETE Column stats: NONE
Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
@@ -78,10 +78,10 @@ STAGE PLANS:
0 _col1 (type: array<int>)
1 _col1 (type: array<int>)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 4 Data size: 1355 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 1694 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 4 Data size: 1355 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5 Data size: 1694 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -107,6 +107,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@test_list1
POSTHOOK: Input: default@test_list2
#### A masked pattern was here ####
+102 [1,2,3,4] Mallory 105 [1,2,3,4] Victor
101 [2,2] Car 103 [2,2] Ema
101 [2,2] Car 102 [2,2] Del
99 [2,2] Mat 103 [2,2] Ema