You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2019/09/18 06:45:27 UTC

[hive] branch master updated: HIVE-22210: Vectorization may reuse computation output columns involved in filtering (Zoltan Haindrich reviewed by Laszlo Bodor)

This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 79e7d2c  HIVE-22210: Vectorization may reuse computation output columns involved in filtering (Zoltan Haindrich reviewed by Laszlo Bodor)
79e7d2c is described below

commit 79e7d2c1bd58f6cc8d0bb52fda3a9018b958f716
Author: Zoltan Haindrich <ki...@rxd.hu>
AuthorDate: Wed Sep 18 08:43:11 2019 +0200

    HIVE-22210: Vectorization may reuse computation output columns involved in filtering (Zoltan Haindrich reviewed by Laszlo Bodor)
    
    Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
---
 .../test/resources/testconfiguration.properties    |   1 +
 .../hive/ql/exec/vector/VectorizationContext.java  |   4 +-
 .../queries/clientpositive/vector_identity_reuse.q |  80 ++++
 .../llap/vector_identity_reuse.q.out               | 489 +++++++++++++++++++++
 4 files changed, 571 insertions(+), 3 deletions(-)

diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 40f5de1..eb7bcab 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -806,6 +806,7 @@ minillaplocal.query.files=\
   union_remove_26.q,\
   union_top_level.q,\
   update_access_time_non_current_db.q, \
+  vector_identity_reuse.q,\
   vector_acid4.q,\
   vector_and_or_scalar_col.q,\
   vector_annotate_stats_select.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 94fc4d4..8f3291a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -2125,9 +2125,7 @@ import com.google.common.annotations.VisibleForTesting;
       vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0]));
     }
 
-    for (VectorExpression ve : children) {
-      ocm.freeOutputColumn(ve.getOutputColumnNum());
-    }
+    freeNonColumns(children.toArray(new VectorExpression[0]));
 
     return vectorExpression;
   }
diff --git a/ql/src/test/queries/clientpositive/vector_identity_reuse.q b/ql/src/test/queries/clientpositive/vector_identity_reuse.q
new file mode 100644
index 0000000..0249069
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_identity_reuse.q
@@ -0,0 +1,80 @@
+drop table if exists  u_table_4;
+
+create table u_table_4(smallint_col_22 smallint, int_col_5 int);
+insert into u_table_4 values(238,922);
+
+drop table u_table_7;
+create table u_table_7 ( bigint_col_3 bigint, int_col_10 int);
+insert into u_table_7 values (571,198);
+
+drop table u_table_19;
+create table u_table_19 (bigint_col_18 bigint ,int_col_19 int, STRING_COL_7 string);
+insert into u_table_19 values (922,5,'500');
+
+
+set hive.mapjoin.full.outer=true;
+set hive.auto.convert.join=true;
+set hive.query.results.cache.enabled=false;
+set hive.merge.nway.joins=true;
+set hive.vectorized.execution.enabled=true;
+set hive.vectorized.reuse.scratch.columns=true;
+
+explain vectorization detail
+SELECT
+ 	a5.int_col,
+  922 as expected,
+  COALESCE(a5.int_col, a5.aa) as expected2,
+  a5.int_col_3 as reality
+FROM            u_table_19 a1 
+FULL OUTER JOIN 
+                ( 
+                       SELECT a2.int_col_5 AS int_col, 
+                    				  a2.smallint_col_22 as aa,
+                              COALESCE(a2.int_col_5, a2.smallint_col_22) AS int_col_3 
+                       FROM   u_table_4 a2
+				) a5 
+ON              ( 
+                                a1.bigint_col_18) = (a5.int_col_3) 
+INNER JOIN 
+                ( 
+                         SELECT   a3.bigint_col_3                                                                                               AS int_col,
+                                  Cast (COALESCE(a3.bigint_col_3, a3.bigint_col_3, a3.int_col_10) AS BIGINT) * Cast (a3.bigint_col_3 AS BIGINT) AS int_col_3
+                         FROM     u_table_7 a3 
+                         WHERE    bigint_col_3=571 
+                ) a4
+ON              (a1.int_col_19=5) 
+OR              ((a5.int_col_3) IN (a4.int_col, 10)) 
+where
+  a1.STRING_COL_7='500'
+ORDER BY        int_col DESC nulls last limit 100
+;
+
+
+SELECT
+ 	a5.int_col,
+  922 as expected,
+  COALESCE(a5.int_col, a5.aa) as expected2,
+  a5.int_col_3 as reality
+FROM            u_table_19 a1 
+FULL OUTER JOIN 
+                ( 
+                       SELECT a2.int_col_5 AS int_col, 
+                    				  a2.smallint_col_22 as aa,
+                              COALESCE(a2.int_col_5, a2.smallint_col_22) AS int_col_3 
+                       FROM   u_table_4 a2
+				) a5 
+ON              ( 
+                                a1.bigint_col_18) = (a5.int_col_3) 
+INNER JOIN 
+                ( 
+                         SELECT   a3.bigint_col_3                                                                                               AS int_col,
+                                  Cast (COALESCE(a3.bigint_col_3, a3.bigint_col_3, a3.int_col_10) AS BIGINT) * Cast (a3.bigint_col_3 AS BIGINT) AS int_col_3
+                         FROM     u_table_7 a3 
+                         WHERE    bigint_col_3=571 
+                ) a4
+ON              (a1.int_col_19=5) 
+OR              ((a5.int_col_3) IN (a4.int_col, 10)) 
+where
+  a1.STRING_COL_7='500'
+ORDER BY        int_col DESC nulls last limit 100
+;
diff --git a/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out b/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out
new file mode 100644
index 0000000..52d67a7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out
@@ -0,0 +1,489 @@
+PREHOOK: query: drop table if exists  u_table_4
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists  u_table_4
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table u_table_4(smallint_col_22 smallint, int_col_5 int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@u_table_4
+POSTHOOK: query: create table u_table_4(smallint_col_22 smallint, int_col_5 int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@u_table_4
+PREHOOK: query: insert into u_table_4 values(238,922)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@u_table_4
+POSTHOOK: query: insert into u_table_4 values(238,922)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@u_table_4
+POSTHOOK: Lineage: u_table_4.int_col_5 SCRIPT []
+POSTHOOK: Lineage: u_table_4.smallint_col_22 SCRIPT []
+PREHOOK: query: drop table u_table_7
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table u_table_7
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table u_table_7 ( bigint_col_3 bigint, int_col_10 int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@u_table_7
+POSTHOOK: query: create table u_table_7 ( bigint_col_3 bigint, int_col_10 int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@u_table_7
+PREHOOK: query: insert into u_table_7 values (571,198)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@u_table_7
+POSTHOOK: query: insert into u_table_7 values (571,198)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@u_table_7
+POSTHOOK: Lineage: u_table_7.bigint_col_3 SCRIPT []
+POSTHOOK: Lineage: u_table_7.int_col_10 SCRIPT []
+PREHOOK: query: drop table u_table_19
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table u_table_19
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table u_table_19 (bigint_col_18 bigint ,int_col_19 int, STRING_COL_7 string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@u_table_19
+POSTHOOK: query: create table u_table_19 (bigint_col_18 bigint ,int_col_19 int, STRING_COL_7 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@u_table_19
+PREHOOK: query: insert into u_table_19 values (922,5,'500')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@u_table_19
+POSTHOOK: query: insert into u_table_19 values (922,5,'500')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@u_table_19
+POSTHOOK: Lineage: u_table_19.bigint_col_18 SCRIPT []
+POSTHOOK: Lineage: u_table_19.int_col_19 SCRIPT []
+POSTHOOK: Lineage: u_table_19.string_col_7 SCRIPT []
+Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: explain vectorization detail
+SELECT
+ 	a5.int_col,
+  922 as expected,
+  COALESCE(a5.int_col, a5.aa) as expected2,
+  a5.int_col_3 as reality
+FROM            u_table_19 a1 
+FULL OUTER JOIN 
+                ( 
+                       SELECT a2.int_col_5 AS int_col, 
+                    				  a2.smallint_col_22 as aa,
+                              COALESCE(a2.int_col_5, a2.smallint_col_22) AS int_col_3 
+                       FROM   u_table_4 a2
+				) a5 
+ON              ( 
+                                a1.bigint_col_18) = (a5.int_col_3) 
+INNER JOIN 
+                ( 
+                         SELECT   a3.bigint_col_3                                                                                               AS int_col,
+                                  Cast (COALESCE(a3.bigint_col_3, a3.bigint_col_3, a3.int_col_10) AS BIGINT) * Cast (a3.bigint_col_3 AS BIGINT) AS int_col_3
+                         FROM     u_table_7 a3 
+                         WHERE    bigint_col_3=571 
+                ) a4
+ON              (a1.int_col_19=5) 
+OR              ((a5.int_col_3) IN (a4.int_col, 10)) 
+where
+  a1.STRING_COL_7='500'
+ORDER BY        int_col DESC nulls last limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@u_table_19
+PREHOOK: Input: default@u_table_4
+PREHOOK: Input: default@u_table_7
+#### A masked pattern was here ####
+POSTHOOK: query: explain vectorization detail
+SELECT
+ 	a5.int_col,
+  922 as expected,
+  COALESCE(a5.int_col, a5.aa) as expected2,
+  a5.int_col_3 as reality
+FROM            u_table_19 a1 
+FULL OUTER JOIN 
+                ( 
+                       SELECT a2.int_col_5 AS int_col, 
+                    				  a2.smallint_col_22 as aa,
+                              COALESCE(a2.int_col_5, a2.smallint_col_22) AS int_col_3 
+                       FROM   u_table_4 a2
+				) a5 
+ON              ( 
+                                a1.bigint_col_18) = (a5.int_col_3) 
+INNER JOIN 
+                ( 
+                         SELECT   a3.bigint_col_3                                                                                               AS int_col,
+                                  Cast (COALESCE(a3.bigint_col_3, a3.bigint_col_3, a3.int_col_10) AS BIGINT) * Cast (a3.bigint_col_3 AS BIGINT) AS int_col_3
+                         FROM     u_table_7 a3 
+                         WHERE    bigint_col_3=571 
+                ) a4
+ON              (a1.int_col_19=5) 
+OR              ((a5.int_col_3) IN (a4.int_col, 10)) 
+where
+  a1.STRING_COL_7='500'
+ORDER BY        int_col DESC nulls last limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@u_table_19
+POSTHOOK: Input: default@u_table_4
+POSTHOOK: Input: default@u_table_7
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a1
+                  filterExpr: (string_col_7 = '500') (type: boolean)
+                  Statistics: Num rows: 1 Data size: 99 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:bigint_col_18:bigint, 1:int_col_19:int, 2:string_col_7:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterStringGroupColEqualStringScalar(col 2:string, val 500)
+                    predicate: (string_col_7 = '500') (type: boolean)
+                    Statistics: Num rows: 1 Data size: 99 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: bigint_col_18 (type: bigint), int_col_19 (type: int)
+                      outputColumnNames: _col0, _col1
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 1]
+                      Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                      Map Join Operator
+                        condition map:
+                             Left Outer Join 0 to 1
+                        keys:
+                          0 _col0 (type: bigint)
+                          1 _col3 (type: bigint)
+                        Map Join Vectorization:
+                            bigTableKeyColumns: 0:bigint
+                            bigTableRetainColumnNums: [1]
+                            bigTableValueColumns: 1:int
+                            className: VectorMapJoinOuterLongOperator
+                            native: true
+                            nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true
+                            projectedOutput: 1:int, 4:int, 5:smallint, 6:int
+                            smallTableValueMapping: 4:int, 5:smallint, 6:int
+                            hashTableImplementationType: OPTIMIZED
+                        outputColumnNames: _col1, _col2, _col3, _col4
+                        input vertices:
+                          1 Map 3
+                        Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                        Filter Operator
+                          Filter Vectorization:
+                              className: VectorFilterOperator
+                              native: true
+                              predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:int, val 5), FilterLongColEqualLongScalar(col 6:int, val 10), FilterLongColEqualLongScalar(col 6:bigint, val 571)(children: col 6:int))
+                          predicate: ((_col1 = 5) or (_col4 = 10) or (UDFToLong(_col4) = 571L)) (type: boolean)
+                          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                          Select Operator
+                            expressions: _col2 (type: int), _col3 (type: smallint), _col4 (type: int)
+                            outputColumnNames: _col0, _col1, _col2
+                            Select Vectorization:
+                                className: VectorSelectOperator
+                                native: true
+                                projectedOutputColumnNums: [4, 5, 6]
+                            Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                            Map Join Operator
+                              condition map:
+                                   Inner Join 0 to 1
+                              keys:
+                                0 
+                                1 
+                              Map Join Vectorization:
+                                  bigTableRetainColumnNums: [4, 5, 6]
+                                  bigTableValueColumns: 4:int, 5:smallint, 6:int
+                                  className: VectorMapJoinInnerBigOnlyMultiKeyOperator
+                                  native: true
+                                  nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
+                                  nonOuterSmallTableKeyMapping: []
+                                  projectedOutput: 4:int, 5:smallint, 6:int
+                                  hashTableImplementationType: OPTIMIZED
+                              outputColumnNames: _col0, _col1, _col2
+                              input vertices:
+                                1 Map 4
+                              Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                              Select Operator
+                                expressions: _col0 (type: int), CASE WHEN (_col0 is not null) THEN (_col0) ELSE (UDFToInteger(_col1)) END (type: int), _col2 (type: int)
+                                outputColumnNames: _col0, _col1, _col2
+                                Select Vectorization:
+                                    className: VectorSelectOperator
+                                    native: true
+                                    projectedOutputColumnNums: [4, 8, 6]
+                                    selectExpressions: IfExprColumnCondExpr(col 7:boolean, col 4:intcol 5:smallint)(children: IsNotNull(col 4:int) -> 7:boolean, col 4:int, col 5:smallint) -> 8:int
+                                Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                                Reduce Output Operator
+                                  key expressions: _col0 (type: int)
+                                  sort order: -
+                                  Reduce Sink Vectorization:
+                                      className: VectorReduceSinkObjectHashOperator
+                                      keyColumns: 4:int
+                                      native: true
+                                      nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                                      valueColumns: 8:int, 6:int
+                                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                                  TopN Hash Memory Usage: 0.1
+                                  value expressions: _col1 (type: int), _col2 (type: int)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    includeColumns: [0, 1, 2]
+                    dataColumns: bigint_col_18:bigint, int_col_19:int, string_col_7:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint]
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: a2
+                  filterExpr: UDFToLong(CASE WHEN (int_col_5 is not null) THEN (int_col_5) ELSE (UDFToInteger(smallint_col_22)) END) is not null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:smallint_col_22:smallint, 1:int_col_5:int, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: SelectColumnIsNotNull(col 4:bigint)(children: IfExprColumnCondExpr(col 3:boolean, col 1:intcol 0:smallint)(children: IsNotNull(col 1:int) -> 3:boolean, col 1:int, col 0:smallint) -> 4:int)
+                    predicate: UDFToLong(CASE WHEN (int_col_5 is not null) THEN (int_col_5) ELSE (UDFToInteger(smallint_col_22)) END) is not null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: int_col_5 (type: int), smallint_col_22 (type: smallint), CASE WHEN (int_col_5 is not null) THEN (int_col_5) ELSE (UDFToInteger(smallint_col_22)) END (type: int), UDFToLong(CASE WHEN (int_col_5 is not null) THEN (int_col_5) ELSE (UDFToInteger(smallint_col_22)) END) (type: bigint)
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [1, 0, 6, 8]
+                          selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 1:intcol 0:smallint)(children: IsNotNull(col 1:int) -> 5:boolean, col 1:int, col 0:smallint) -> 6:int, IfExprColumnCondExpr(col 7:boolean, col 1:intcol 0:smallint)(children: IsNotNull(col 1:int) -> 7:boolean, col 1:int, col 0:smallint) -> 8:int
+                      Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col3 (type: bigint)
+                        sort order: +
+                        Map-reduce partition columns: _col3 (type: bigint)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            keyColumns: 8:bigint
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumns: 1:int, 0:smallint, 6:int
+                        Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: int), _col1 (type: smallint), _col2 (type: int)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: smallint_col_22:smallint, int_col_5:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint]
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: a3
+                  filterExpr: (bigint_col_3 = 571L) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:bigint_col_3:bigint, 1:int_col_10:int, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterLongColEqualLongScalar(col 0:bigint, val 571)
+                    predicate: (bigint_col_3 = 571L) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: []
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        sort order: 
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkEmptyKeyOperator
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: bigint_col_3:bigint, int_col_10:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: z
+                reduceColumnSortOrder: -
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int, VALUE._col1:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [bigint]
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: int)
+                outputColumnNames: _col0, _col1, _col2
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [0, 1, 2]
+                Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                Limit
+                  Number of rows: 100
+                  Limit Vectorization:
+                      className: VectorLimitOperator
+                      native: true
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: _col0 (type: int), 922 (type: int), _col1 (type: int), _col2 (type: int)
+                    outputColumnNames: _col0, _col1, _col2, _col3
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 3, 1, 2]
+                        selectExpressions: ConstantVectorExpression(val 922) -> 3:int
+                    Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      File Sink Vectorization:
+                          className: VectorFileSinkOperator
+                          native: false
+                      Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: SELECT
+ 	a5.int_col,
+  922 as expected,
+  COALESCE(a5.int_col, a5.aa) as expected2,
+  a5.int_col_3 as reality
+FROM            u_table_19 a1 
+FULL OUTER JOIN 
+                ( 
+                       SELECT a2.int_col_5 AS int_col, 
+                    				  a2.smallint_col_22 as aa,
+                              COALESCE(a2.int_col_5, a2.smallint_col_22) AS int_col_3 
+                       FROM   u_table_4 a2
+				) a5 
+ON              ( 
+                                a1.bigint_col_18) = (a5.int_col_3) 
+INNER JOIN 
+                ( 
+                         SELECT   a3.bigint_col_3                                                                                               AS int_col,
+                                  Cast (COALESCE(a3.bigint_col_3, a3.bigint_col_3, a3.int_col_10) AS BIGINT) * Cast (a3.bigint_col_3 AS BIGINT) AS int_col_3
+                         FROM     u_table_7 a3 
+                         WHERE    bigint_col_3=571 
+                ) a4
+ON              (a1.int_col_19=5) 
+OR              ((a5.int_col_3) IN (a4.int_col, 10)) 
+where
+  a1.STRING_COL_7='500'
+ORDER BY        int_col DESC nulls last limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@u_table_19
+PREHOOK: Input: default@u_table_4
+PREHOOK: Input: default@u_table_7
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ 	a5.int_col,
+  922 as expected,
+  COALESCE(a5.int_col, a5.aa) as expected2,
+  a5.int_col_3 as reality
+FROM            u_table_19 a1 
+FULL OUTER JOIN 
+                ( 
+                       SELECT a2.int_col_5 AS int_col, 
+                    				  a2.smallint_col_22 as aa,
+                              COALESCE(a2.int_col_5, a2.smallint_col_22) AS int_col_3 
+                       FROM   u_table_4 a2
+				) a5 
+ON              ( 
+                                a1.bigint_col_18) = (a5.int_col_3) 
+INNER JOIN 
+                ( 
+                         SELECT   a3.bigint_col_3                                                                                               AS int_col,
+                                  Cast (COALESCE(a3.bigint_col_3, a3.bigint_col_3, a3.int_col_10) AS BIGINT) * Cast (a3.bigint_col_3 AS BIGINT) AS int_col_3
+                         FROM     u_table_7 a3 
+                         WHERE    bigint_col_3=571 
+                ) a4
+ON              (a1.int_col_19=5) 
+OR              ((a5.int_col_3) IN (a4.int_col, 10)) 
+where
+  a1.STRING_COL_7='500'
+ORDER BY        int_col DESC nulls last limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@u_table_19
+POSTHOOK: Input: default@u_table_4
+POSTHOOK: Input: default@u_table_7
+#### A masked pattern was here ####
+922	922	922	922