You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sz...@apache.org on 2019/11/20 12:45:04 UTC

[hive] branch master updated: HIVE-22513: Constant propagation of casted column in filter ops can cause incorrect results (Adam Szita, reviewed by Zoltan Haindrich, Peter Vary)

This is an automated email from the ASF dual-hosted git repository.

szita pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


View the commit online:
https://github.com/apache/hive/commit/df8e185aeb555f522345d5703cd5375aad2ae4b4

The following commit(s) were added to refs/heads/master by this push:
     new df8e185  HIVE-22513: Constant propagation of casted column in filter ops can cause incorrect results (Adam Szita, reviewed by Zoltan Haindrich, Peter Vary)
df8e185 is described below

commit df8e185aeb555f522345d5703cd5375aad2ae4b4
Author: Adam Szita <sz...@cloudera.com>
AuthorDate: Tue Nov 19 13:47:12 2019 +0100

    HIVE-22513: Constant propagation of casted column in filter ops can cause incorrect results (Adam Szita, reviewed by Zoltan Haindrich, Peter Vary)
---
 .../ql/optimizer/ConstantPropagateProcFactory.java |  15 +-
 .../test/queries/clientpositive/constprog_cast.q   |  21 ++
 .../results/clientpositive/constprog_cast.q.out    | 221 +++++++++++++++++++++
 3 files changed, 251 insertions(+), 6 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
index c1981e6..55eb9d8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
@@ -530,12 +530,15 @@ public final class ConstantPropagateProcFactory {
         return;
       }
       // If both sides are constants, there is nothing to propagate
-      ExprNodeColumnDesc c = ExprNodeDescUtils.getColumnExpr(lOperand);
-      if (null == c) {
-        c = ExprNodeDescUtils.getColumnExpr(rOperand);
-      }
-      if (null == c) {
-        // we need a column expression on other side.
+      ExprNodeColumnDesc c;
+      if (lOperand instanceof ExprNodeColumnDesc) {
+        c = (ExprNodeColumnDesc)lOperand;
+      } else if (rOperand instanceof ExprNodeColumnDesc) {
+        c = (ExprNodeColumnDesc)rOperand;
+      } else {
+        // we need a column expression on other side
+        // NOTE: we also cannot rely on column expressions wrapped inside casts as casting might
+        // truncate information
         return;
       }
       ColumnInfo ci = resolveColumn(rs, c);
diff --git a/ql/src/test/queries/clientpositive/constprog_cast.q b/ql/src/test/queries/clientpositive/constprog_cast.q
new file mode 100644
index 0000000..765cd85
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/constprog_cast.q
@@ -0,0 +1,21 @@
+set hive.optimize.constant.propagation=true;
+
+CREATE TABLE constcasttest (id string);
+INSERT INTO constcasttest values('2019-11-05 01:01:11');
+
+set hive.cbo.enable=true;
+
+EXPLAIN SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE CAST(id AS VARCHAR(9)) = '2019-11-0';
+SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE CAST(id AS VARCHAR(9)) = '2019-11-0';
+
+EXPLAIN SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE SUBSTR(id, 0, 9) = '2019-11-0';
+SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE SUBSTR(id, 0, 9) = '2019-11-0';
+
+set hive.cbo.enable=false;
+
+EXPLAIN SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE CAST(id AS VARCHAR(9)) = '2019-11-0';
+SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE CAST(id AS VARCHAR(9)) = '2019-11-0';
+
+EXPLAIN SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE SUBSTR(id, 0, 9) = '2019-11-0';
+SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE SUBSTR(id, 0, 9) = '2019-11-0';
+
diff --git a/ql/src/test/results/clientpositive/constprog_cast.q.out b/ql/src/test/results/clientpositive/constprog_cast.q.out
new file mode 100644
index 0000000..3f76604
--- /dev/null
+++ b/ql/src/test/results/clientpositive/constprog_cast.q.out
@@ -0,0 +1,221 @@
+PREHOOK: query: CREATE TABLE constcasttest (id string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@constcasttest
+POSTHOOK: query: CREATE TABLE constcasttest (id string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@constcasttest
+PREHOOK: query: INSERT INTO constcasttest values('2019-11-05 01:01:11')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@constcasttest
+POSTHOOK: query: INSERT INTO constcasttest values('2019-11-05 01:01:11')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@constcasttest
+POSTHOOK: Lineage: constcasttest.id SCRIPT []
+PREHOOK: query: EXPLAIN SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE CAST(id AS VARCHAR(9)) = '2019-11-0'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@constcasttest
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE CAST(id AS VARCHAR(9)) = '2019-11-0'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@constcasttest
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: constcasttest
+            filterExpr: (CAST( CAST( id AS varchar(9)) AS STRING) = '2019-11-0') (type: boolean)
+            Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE
+            Filter Operator
+              predicate: (CAST( CAST( id AS varchar(9)) AS STRING) = '2019-11-0') (type: boolean)
+              Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE
+              Select Operator
+                expressions: id (type: string), CAST( id AS varchar(10)) (type: varchar(10))
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 197 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 197 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE CAST(id AS VARCHAR(9)) = '2019-11-0'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@constcasttest
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE CAST(id AS VARCHAR(9)) = '2019-11-0'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@constcasttest
+#### A masked pattern was here ####
+2019-11-05 01:01:11	2019-11-05
+PREHOOK: query: EXPLAIN SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE SUBSTR(id, 0, 9) = '2019-11-0'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@constcasttest
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE SUBSTR(id, 0, 9) = '2019-11-0'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@constcasttest
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: constcasttest
+            filterExpr: (substr(id, 0, 9) = '2019-11-0') (type: boolean)
+            Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE
+            Filter Operator
+              predicate: (substr(id, 0, 9) = '2019-11-0') (type: boolean)
+              Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE
+              Select Operator
+                expressions: id (type: string), CAST( id AS varchar(10)) (type: varchar(10))
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 197 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 197 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE SUBSTR(id, 0, 9) = '2019-11-0'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@constcasttest
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE SUBSTR(id, 0, 9) = '2019-11-0'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@constcasttest
+#### A masked pattern was here ####
+2019-11-05 01:01:11	2019-11-05
+PREHOOK: query: EXPLAIN SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE CAST(id AS VARCHAR(9)) = '2019-11-0'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@constcasttest
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE CAST(id AS VARCHAR(9)) = '2019-11-0'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@constcasttest
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: constcasttest
+            filterExpr: (CAST( id AS varchar(9)) = '2019-11-0') (type: boolean)
+            Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE
+            Filter Operator
+              predicate: (CAST( id AS varchar(9)) = '2019-11-0') (type: boolean)
+              Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE
+              Select Operator
+                expressions: id (type: string), CAST( id AS varchar(10)) (type: varchar(10))
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 197 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 197 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE CAST(id AS VARCHAR(9)) = '2019-11-0'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@constcasttest
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE CAST(id AS VARCHAR(9)) = '2019-11-0'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@constcasttest
+#### A masked pattern was here ####
+2019-11-05 01:01:11	2019-11-05
+PREHOOK: query: EXPLAIN SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE SUBSTR(id, 0, 9) = '2019-11-0'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@constcasttest
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE SUBSTR(id, 0, 9) = '2019-11-0'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@constcasttest
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: constcasttest
+            filterExpr: (substr(id, 0, 9) = '2019-11-0') (type: boolean)
+            Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE
+            Filter Operator
+              predicate: (substr(id, 0, 9) = '2019-11-0') (type: boolean)
+              Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE
+              Select Operator
+                expressions: id (type: string), CAST( id AS varchar(10)) (type: varchar(10))
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 197 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 197 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE SUBSTR(id, 0, 9) = '2019-11-0'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@constcasttest
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT id, CAST(id AS VARCHAR(10)) FROM constcasttest WHERE SUBSTR(id, 0, 9) = '2019-11-0'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@constcasttest
+#### A masked pattern was here ####
+2019-11-05 01:01:11	2019-11-05