You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by yc...@apache.org on 2016/04/25 01:56:59 UTC

[2/2] hive git commit: HIVE-13570: Some queries with Union all fail when CBO is off (Yongzhi Chen, reviewed by Chaoyu Tang)

HIVE-13570: Some queries with Union all fail when CBO is off (Yongzhi Chen, reviewed by Chaoyu Tang)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/413645d1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/413645d1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/413645d1

Branch: refs/heads/master
Commit: 413645d1b7e5597aa882cc4a23e664693707ae21
Parents: a42bc67
Author: Yongzhi Chen <yc...@apache.org>
Authored: Wed Apr 20 20:52:18 2016 -0400
Committer: Yongzhi Chen <yc...@apache.org>
Committed: Sun Apr 24 19:54:47 2016 -0400

----------------------------------------------------------------------
 .../hive/ql/optimizer/ColumnPrunerProcCtx.java  |    2 +-
 .../test/queries/clientpositive/union_offcbo.q  |  483 +++++
 .../results/clientpositive/union_offcbo.q.out   | 1779 ++++++++++++++++++
 3 files changed, 2263 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/413645d1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
index 7befd3b..611a6b7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcCtx.java
@@ -277,7 +277,7 @@ public class ColumnPrunerProcCtx implements NodeProcessorCtx {
 
     for (Operator<? extends OperatorDesc> child : curOp.getChildOperators()) {
       if (child instanceof UnionOperator) {
-        prunList = genColLists(curOp, child);
+        prunList = genColLists(child);
         if (prunList == null || prunList.size() == 0 || parentPrunList.size() == prunList.size()) {
           continue;
         }

http://git-wip-us.apache.org/repos/asf/hive/blob/413645d1/ql/src/test/queries/clientpositive/union_offcbo.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_offcbo.q b/ql/src/test/queries/clientpositive/union_offcbo.q
new file mode 100644
index 0000000..66d4bee
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union_offcbo.q
@@ -0,0 +1,483 @@
+set hive.cbo.enable=false;
+set hive.ppd.remove.duplicatefilters=true;
+set hive.optimize.ppd=true;
+
+DROP TABLE IF EXISTS ttest1;
+DROP TABLE IF EXISTS ttest2;
+CREATE TABLE ttest1 (
+  `id1` bigint COMMENT 'from deserializer',
+  `ts1` string COMMENT 'from deserializer',
+  `dt1` string COMMENT 'from deserializer',
+  `dt2` string COMMENT 'from deserializer',
+  `ac1` string COMMENT 'from deserializer',
+  `kd1` string COMMENT 'from deserializer',
+  `sts` string COMMENT 'from deserializer',
+  `at1` bigint COMMENT 'from deserializer');
+
+CREATE TABLE ttest2 (
+  `id1` bigint,
+  `ts1` string,
+  `dt1` string,
+  `dt2` string,
+  `ac1` string,
+  `kd1` string,
+  `sts` string,
+  `at1` bigint,
+  `khash` string,
+  `rhash` string);
+
+explain SELECT
+  A2.id1, A2.sts,A2.at1,
+    CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1
+        ,A2.dt2
+        ,A2.khash
+        ,A2.rhash
+        ,A2.FLAG
+  FROM (
+   SELECT
+  A2.id1, A2.sts,A2.at1
+        ,A2.dt1
+        ,A2.dt2
+        ,A2.khash
+        ,A2.rhash
+    ,CASE
+     WHEN A2.khash IS NOT NULL
+     AND A1.khash IS NULL
+         AND A2.dt1 >= '2016-02-05'
+     THEN 'DEL'
+
+         WHEN A2.khash IS NOT NULL
+     AND A1.khash IS NULL
+     AND A2.dt1 <= '2016-02-05'
+     THEN 'RET'
+
+     WHEN
+     (
+     A2.khash = A1.khash
+     AND A2.rhash <> A1.rhash
+     )
+     THEN 'A_INS'
+
+     ELSE 'NA'
+    END FLAG
+        FROM (
+                SELECT *
+                        ,reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) khash
+                        ,reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(at1)) rhash
+                FROM ttest1
+                WHERE ts1 = '2015-11-20'
+                ) A1
+        FULL OUTER JOIN (
+                SELECT *
+                FROM ttest2
+                WHERE '2015-11-20' BETWEEN dt1 AND dt2
+                ) A2
+     ON A1.khash = A2.khash
+     WHERE NOT (
+        NVL(A1.khash, - 1) = NVL(A2.khash, - 1)
+        AND NVL(A1.rhash, - 1) = NVL(A2.rhash, - 1)
+        )
+     AND A2.khash IS NOT NULL
+
+   UNION ALL
+
+   SELECT A1.id1, A1.sts,A1.at1
+        ,A1.dt1
+                , '2099-12-31' dt2
+        ,A1.khash
+        ,A1.rhash
+     ,CASE WHEN A2.khash IS NOT NULL
+     AND A1.khash IS NULL
+     AND A2.ts1 <= A1.ts1
+     THEN 'DEL'
+
+     WHEN ( A2.khash IS NULL AND A1.khash IS NOT NULL )
+     OR ( A2.khash = A1.khash AND A2.rhash <> A1.rhash ) THEN 'INS' ELSE 'NA' END FLAG
+   FROM (
+     SELECT *
+        ,reflect('org.apache.commons.codec.digest.DigestUtils', 'sha256Hex',concat(id1)) khash
+        ,reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(at1)) rhash
+      FROM ttest1
+     WHERE ts1 = '2015-11-20'
+    ) A1
+   FULL OUTER JOIN (
+        SELECT *
+        FROM ttest2
+        WHERE '2015-11-20' BETWEEN dt1
+          AND dt2
+       ) A2 ON A1.khash = A2.khash
+       WHERE NOT (
+          NVL(A1.khash, - 1) = NVL(A2.khash, - 1)
+          AND NVL(A1.rhash, - 1) = NVL(A2.rhash, - 1)
+          )
+       AND A1.khash IS NOT NULL
+   ) A2
+   where a2.flag <> 'RET';
+
+set hive.cbo.enable=true;
+set hive.ppd.remove.duplicatefilters=true;
+set hive.optimize.ppd=true;
+
+explain SELECT
+  A2.id1, A2.sts,A2.at1,
+    CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1
+        ,A2.dt2
+        ,A2.khash
+        ,A2.rhash
+        ,A2.FLAG
+  FROM (
+   SELECT
+  A2.id1, A2.sts,A2.at1
+        ,A2.dt1
+        ,A2.dt2
+        ,A2.khash
+        ,A2.rhash
+    ,CASE
+     WHEN A2.khash IS NOT NULL
+     AND A1.khash IS NULL
+         AND A2.dt1 >= '2016-02-05'
+     THEN 'DEL'
+
+         WHEN A2.khash IS NOT NULL
+     AND A1.khash IS NULL
+     AND A2.dt1 <= '2016-02-05'
+     THEN 'RET'
+
+     WHEN
+     (
+     A2.khash = A1.khash
+     AND A2.rhash <> A1.rhash
+     )
+     THEN 'A_INS'
+
+     ELSE 'NA'
+    END FLAG
+        FROM (
+                SELECT *
+                        ,reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) khash
+                        ,reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(at1)) rhash
+                FROM ttest1
+                WHERE ts1 = '2015-11-20'
+                ) A1
+        FULL OUTER JOIN (
+                SELECT *
+                FROM ttest2
+                WHERE '2015-11-20' BETWEEN dt1 AND dt2
+                ) A2
+     ON A1.khash = A2.khash
+     WHERE NOT (
+        NVL(A1.khash, - 1) = NVL(A2.khash, - 1)
+        AND NVL(A1.rhash, - 1) = NVL(A2.rhash, - 1)
+        )
+     AND A2.khash IS NOT NULL
+
+   UNION ALL
+
+   SELECT A1.id1, A1.sts,A1.at1
+        ,A1.dt1
+                , '2099-12-31' dt2
+        ,A1.khash
+        ,A1.rhash
+     ,CASE WHEN A2.khash IS NOT NULL
+     AND A1.khash IS NULL
+     AND A2.ts1 <= A1.ts1
+     THEN 'DEL'
+
+     WHEN ( A2.khash IS NULL AND A1.khash IS NOT NULL )
+     OR ( A2.khash = A1.khash AND A2.rhash <> A1.rhash ) THEN 'INS' ELSE 'NA' END FLAG
+   FROM (
+     SELECT *
+        ,reflect('org.apache.commons.codec.digest.DigestUtils', 'sha256Hex',concat(id1)) khash
+        ,reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(at1)) rhash
+      FROM ttest1
+     WHERE ts1 = '2015-11-20'
+    ) A1
+   FULL OUTER JOIN (
+        SELECT *
+        FROM ttest2
+        WHERE '2015-11-20' BETWEEN dt1
+          AND dt2
+       ) A2 ON A1.khash = A2.khash
+       WHERE NOT (
+          NVL(A1.khash, - 1) = NVL(A2.khash, - 1)
+          AND NVL(A1.rhash, - 1) = NVL(A2.rhash, - 1)
+          )
+       AND A1.khash IS NOT NULL
+   ) A2
+   where a2.flag <> 'RET';
+
+set hive.cbo.enable=false;
+set hive.ppd.remove.duplicatefilters=false;
+set hive.optimize.ppd=true;
+
+explain SELECT
+  A2.id1, A2.sts,A2.at1,
+    CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1
+        ,A2.dt2
+        ,A2.khash
+        ,A2.rhash
+        ,A2.FLAG
+  FROM (
+   SELECT
+  A2.id1, A2.sts,A2.at1
+        ,A2.dt1
+        ,A2.dt2
+        ,A2.khash
+        ,A2.rhash
+    ,CASE
+     WHEN A2.khash IS NOT NULL
+     AND A1.khash IS NULL
+         AND A2.dt1 >= '2016-02-05'
+     THEN 'DEL'
+
+         WHEN A2.khash IS NOT NULL
+     AND A1.khash IS NULL
+     AND A2.dt1 <= '2016-02-05'
+     THEN 'RET'
+
+     WHEN
+     (
+     A2.khash = A1.khash
+     AND A2.rhash <> A1.rhash
+     )
+     THEN 'A_INS'
+
+     ELSE 'NA'
+    END FLAG
+        FROM (
+                SELECT *
+                        ,reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) khash
+                        ,reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(at1)) rhash
+                FROM ttest1
+                WHERE ts1 = '2015-11-20'
+                ) A1
+        FULL OUTER JOIN (
+                SELECT *
+                FROM ttest2
+                WHERE '2015-11-20' BETWEEN dt1 AND dt2
+                ) A2
+     ON A1.khash = A2.khash
+     WHERE NOT (
+        NVL(A1.khash, - 1) = NVL(A2.khash, - 1)
+        AND NVL(A1.rhash, - 1) = NVL(A2.rhash, - 1)
+        )
+     AND A2.khash IS NOT NULL
+
+   UNION ALL
+
+   SELECT A1.id1, A1.sts,A1.at1
+        ,A1.dt1
+                , '2099-12-31' dt2
+        ,A1.khash
+        ,A1.rhash
+     ,CASE WHEN A2.khash IS NOT NULL
+     AND A1.khash IS NULL
+     AND A2.ts1 <= A1.ts1
+     THEN 'DEL'
+
+     WHEN ( A2.khash IS NULL AND A1.khash IS NOT NULL )
+     OR ( A2.khash = A1.khash AND A2.rhash <> A1.rhash ) THEN 'INS' ELSE 'NA' END FLAG
+   FROM (
+     SELECT *
+        ,reflect('org.apache.commons.codec.digest.DigestUtils', 'sha256Hex',concat(id1)) khash
+        ,reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(at1)) rhash
+      FROM ttest1
+     WHERE ts1 = '2015-11-20'
+    ) A1
+   FULL OUTER JOIN (
+        SELECT *
+        FROM ttest2
+        WHERE '2015-11-20' BETWEEN dt1
+          AND dt2
+       ) A2 ON A1.khash = A2.khash
+       WHERE NOT (
+          NVL(A1.khash, - 1) = NVL(A2.khash, - 1)
+          AND NVL(A1.rhash, - 1) = NVL(A2.rhash, - 1)
+          )
+       AND A1.khash IS NOT NULL
+   ) A2
+   where a2.flag <> 'RET';
+
+set hive.cbo.enable=false;
+set hive.optimize.ppd=false;
+explain SELECT
+  A2.id1, A2.sts,A2.at1,
+    CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1
+        ,A2.dt2
+        ,A2.khash
+        ,A2.rhash
+        ,A2.FLAG
+  FROM (
+   SELECT
+  A2.id1, A2.sts,A2.at1
+        ,A2.dt1
+        ,A2.dt2
+        ,A2.khash
+        ,A2.rhash
+    ,CASE
+     WHEN A2.khash IS NOT NULL
+     AND A1.khash IS NULL
+         AND A2.dt1 >= '2016-02-05'
+     THEN 'DEL'
+
+         WHEN A2.khash IS NOT NULL
+     AND A1.khash IS NULL
+     AND A2.dt1 <= '2016-02-05'
+     THEN 'RET'
+
+     WHEN
+     (
+     A2.khash = A1.khash
+     AND A2.rhash <> A1.rhash
+     )
+     THEN 'A_INS'
+
+     ELSE 'NA'
+    END FLAG
+        FROM (
+                SELECT *
+                        ,reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) khash
+                        ,reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(at1)) rhash
+                FROM ttest1
+                WHERE ts1 = '2015-11-20'
+                ) A1
+        FULL OUTER JOIN (
+                SELECT *
+                FROM ttest2
+                WHERE '2015-11-20' BETWEEN dt1 AND dt2
+                ) A2
+     ON A1.khash = A2.khash
+     WHERE NOT (
+        NVL(A1.khash, - 1) = NVL(A2.khash, - 1)
+        AND NVL(A1.rhash, - 1) = NVL(A2.rhash, - 1)
+        )
+     AND A2.khash IS NOT NULL
+
+   UNION ALL
+
+   SELECT A1.id1, A1.sts,A1.at1
+        ,A1.dt1
+                , '2099-12-31' dt2
+        ,A1.khash
+        ,A1.rhash
+     ,CASE WHEN A2.khash IS NOT NULL
+     AND A1.khash IS NULL
+     AND A2.ts1 <= A1.ts1
+     THEN 'DEL'
+
+     WHEN ( A2.khash IS NULL AND A1.khash IS NOT NULL )
+     OR ( A2.khash = A1.khash AND A2.rhash <> A1.rhash ) THEN 'INS' ELSE 'NA' END FLAG
+   FROM (
+     SELECT *
+        ,reflect('org.apache.commons.codec.digest.DigestUtils', 'sha256Hex',concat(id1)) khash
+        ,reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(at1)) rhash
+      FROM ttest1
+     WHERE ts1 = '2015-11-20'
+    ) A1
+   FULL OUTER JOIN (
+        SELECT *
+        FROM ttest2
+        WHERE '2015-11-20' BETWEEN dt1
+          AND dt2
+       ) A2 ON A1.khash = A2.khash
+       WHERE NOT (
+          NVL(A1.khash, - 1) = NVL(A2.khash, - 1)
+          AND NVL(A1.rhash, - 1) = NVL(A2.rhash, - 1)
+          )
+       AND A1.khash IS NOT NULL
+   ) A2
+   where a2.flag <> 'RET';
+
+set hive.cbo.enable=true;
+set hive.optimize.ppd=false;
+explain SELECT
+  A2.id1, A2.sts,A2.at1,
+    CASE WHEN FLAG = 'A_INS' THEN date_add('2015-11-20', 1) ELSE '2015-11-20' END dt1
+        ,A2.dt2
+        ,A2.khash
+        ,A2.rhash
+        ,A2.FLAG
+  FROM (
+   SELECT
+  A2.id1, A2.sts,A2.at1
+        ,A2.dt1
+        ,A2.dt2
+        ,A2.khash
+        ,A2.rhash
+    ,CASE
+     WHEN A2.khash IS NOT NULL
+     AND A1.khash IS NULL
+         AND A2.dt1 >= '2016-02-05'
+     THEN 'DEL'
+
+         WHEN A2.khash IS NOT NULL
+     AND A1.khash IS NULL
+     AND A2.dt1 <= '2016-02-05'
+     THEN 'RET'
+
+     WHEN
+     (
+     A2.khash = A1.khash
+     AND A2.rhash <> A1.rhash
+     )
+     THEN 'A_INS'
+
+     ELSE 'NA'
+    END FLAG
+        FROM (
+                SELECT *
+                        ,reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(id1)) khash
+                        ,reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(at1)) rhash
+                FROM ttest1
+                WHERE ts1 = '2015-11-20'
+                ) A1
+        FULL OUTER JOIN (
+                SELECT *
+                FROM ttest2
+                WHERE '2015-11-20' BETWEEN dt1 AND dt2
+                ) A2
+     ON A1.khash = A2.khash
+     WHERE NOT (
+        NVL(A1.khash, - 1) = NVL(A2.khash, - 1)
+        AND NVL(A1.rhash, - 1) = NVL(A2.rhash, - 1)
+        )
+     AND A2.khash IS NOT NULL
+
+   UNION ALL
+
+   SELECT A1.id1, A1.sts,A1.at1
+        ,A1.dt1
+                , '2099-12-31' dt2
+        ,A1.khash
+        ,A1.rhash
+     ,CASE WHEN A2.khash IS NOT NULL
+     AND A1.khash IS NULL
+     AND A2.ts1 <= A1.ts1
+     THEN 'DEL'
+
+     WHEN ( A2.khash IS NULL AND A1.khash IS NOT NULL )
+     OR ( A2.khash = A1.khash AND A2.rhash <> A1.rhash ) THEN 'INS' ELSE 'NA' END FLAG
+   FROM (
+     SELECT *
+        ,reflect('org.apache.commons.codec.digest.DigestUtils', 'sha256Hex',concat(id1)) khash
+        ,reflect('org.apache.commons.codec.digest.DigestUtils','sha256Hex',concat(at1)) rhash
+      FROM ttest1
+     WHERE ts1 = '2015-11-20'
+    ) A1
+   FULL OUTER JOIN (
+        SELECT *
+        FROM ttest2
+        WHERE '2015-11-20' BETWEEN dt1
+          AND dt2
+       ) A2 ON A1.khash = A2.khash
+       WHERE NOT (
+          NVL(A1.khash, - 1) = NVL(A2.khash, - 1)
+          AND NVL(A1.rhash, - 1) = NVL(A2.rhash, - 1)
+          )
+       AND A1.khash IS NOT NULL
+   ) A2
+   where a2.flag <> 'RET';
+
+DROP TABLE ttest1;
+DROP TABLE ttest2;
+