You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by za...@apache.org on 2023/03/22 11:22:12 UTC

[hive] branch master updated: HIVE-27124: Disable ANTI join transformation by default (Steve Carlin reviewed by Stamatis Zampetakis)

This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 33b02ebb317 HIVE-27124: Disable ANTI join transformation by default (Steve Carlin reviewed by Stamatis Zampetakis)
33b02ebb317 is described below

commit 33b02ebb317f261bfb2a9a6dde740fbe565c6946
Author: Steve Carlin <sc...@cloudera.com>
AuthorDate: Tue Mar 7 12:51:13 2023 -0800

    HIVE-27124: Disable ANTI join transformation by default (Steve Carlin reviewed by Stamatis Zampetakis)
    
    Minimize the risk of having wrong results, such as HIVE-26659, by
    disabling the ANTI join transformation by default.
    
    Closes #4101
---
 common/src/java/org/apache/hadoop/hive/conf/HiveConf.java          | 7 +++++--
 ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q        | 1 +
 ql/src/test/queries/clientpositive/empty_result_outerjoin.q        | 1 +
 ql/src/test/queries/clientpositive/explainuser_1.q                 | 3 ++-
 ql/src/test/queries/clientpositive/lineage3.q                      | 1 +
 ql/src/test/queries/clientpositive/perf/cbo_query16.q              | 1 +
 ql/src/test/queries/clientpositive/perf/cbo_query69.q              | 1 +
 ql/src/test/queries/clientpositive/perf/cbo_query78.q              | 1 +
 ql/src/test/queries/clientpositive/perf/cbo_query94.q              | 1 +
 ql/src/test/queries/clientpositive/perf/query16.q                  | 1 +
 ql/src/test/queries/clientpositive/perf/query69.q                  | 1 +
 ql/src/test/queries/clientpositive/perf/query78.q                  | 1 +
 ql/src/test/queries/clientpositive/perf/query94.q                  | 1 +
 .../clientpositive/subquery_complex_correlation_predicates.q       | 4 +++-
 ql/src/test/queries/clientpositive/subquery_corr_join.q            | 3 ++-
 ql/src/test/queries/clientpositive/subquery_exists_windowfunc.q    | 1 +
 ql/src/test/queries/clientpositive/subquery_in_having.q            | 1 +
 ql/src/test/queries/clientpositive/subquery_join_rewrite.q         | 2 ++
 ql/src/test/queries/clientpositive/subquery_multi.q                | 1 +
 ql/src/test/queries/clientpositive/subquery_notexists.q            | 1 +
 ql/src/test/queries/clientpositive/subquery_notexists_having.q     | 3 ++-
 21 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 9376348a3e8..9f61c132bde 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2364,8 +2364,11 @@ public class HiveConf extends Configuration {
         "Whether Hive enables the optimization about converting common join into mapjoin based on the input file size. \n" +
         "If this parameter is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than the\n" +
         "specified size, the join is directly converted to a mapjoin (there is no conditional task)."),
-    HIVE_CONVERT_ANTI_JOIN("hive.auto.convert.anti.join", true,
-        "Whether Hive enables the optimization about converting join with null filter to anti join"),
+    HIVE_CONVERT_ANTI_JOIN("hive.auto.convert.anti.join", false,
+        "Whether Hive enables the optimization about converting join with null filter to anti join. The " +
+        "default is currently false because of HIVE-26659. There is the possibility of incorrect results. " +
+        "The incorrect results shown on testing were zeros and nulls on aggregate tpcds queries so it was " +
+        "fairly obvious they were incorrect. But be wary when turning on this flag."),
     HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD("hive.auto.convert.join.noconditionaltask.size",
         10000000L,
         "If hive.auto.convert.join.noconditionaltask is off, this parameter does not take affect. \n" +
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q
index 3be73fe000a..6f571a0ad18 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_10.q
@@ -4,6 +4,7 @@ set hive.explain.user=false;
 ;
 
 set hive.exec.reducers.max = 1;
+set hive.auto.convert.anti.join=true;
 
 CREATE TABLE tbl1_n5(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
 CREATE TABLE tbl2_n4(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
diff --git a/ql/src/test/queries/clientpositive/empty_result_outerjoin.q b/ql/src/test/queries/clientpositive/empty_result_outerjoin.q
index 9199529b9e4..f1f75c2774a 100644
--- a/ql/src/test/queries/clientpositive/empty_result_outerjoin.q
+++ b/ql/src/test/queries/clientpositive/empty_result_outerjoin.q
@@ -1,4 +1,5 @@
 set hive.cli.print.header=true;
+set hive.auto.convert.anti.join=true;
 
 create table t1 (a int, b varchar(10));
 create table t2 (c int, d varchar(10));
diff --git a/ql/src/test/queries/clientpositive/explainuser_1.q b/ql/src/test/queries/clientpositive/explainuser_1.q
index 747cc994864..8d446057d69 100644
--- a/ql/src/test/queries/clientpositive/explainuser_1.q
+++ b/ql/src/test/queries/clientpositive/explainuser_1.q
@@ -12,6 +12,7 @@ set hive.strict.checks.bucketing=false;
 
 set hive.mapred.mode=nonstrict;
 set hive.explain.user=true;
+set hive.auto.convert.anti.join=true;
 
 explain create table src_orc_merge_test_part_n1(key int, value string) partitioned by (ds string, ts string) stored as orc;
 create table src_orc_merge_test_part_n1(key int, value string) partitioned by (ds string, ts string) stored as orc;
@@ -741,4 +742,4 @@ from (select *
           where b.value = a.value  and a.key = b.key and a.value > 'val_9')
      ) a
 ;
-set hive.auto.convert.anti.join=true;
\ No newline at end of file
+set hive.auto.convert.anti.join=true;
diff --git a/ql/src/test/queries/clientpositive/lineage3.q b/ql/src/test/queries/clientpositive/lineage3.q
index 8d5bd30c055..fa59ebd1242 100644
--- a/ql/src/test/queries/clientpositive/lineage3.q
+++ b/ql/src/test/queries/clientpositive/lineage3.q
@@ -4,6 +4,7 @@ SET hive.vectorized.execution.enabled=false;
 set hive.mapred.mode=nonstrict;
 set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.LineageLogger;
 set hive.metastore.disallow.incompatible.col.type.changes=false;
+set hive.auto.convert.anti.join=true;
 drop table if exists d1;
 create table d1(a int);
 
diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query16.q b/ql/src/test/queries/clientpositive/perf/cbo_query16.q
index 74245aa10b8..5ce0b3dfb81 100644
--- a/ql/src/test/queries/clientpositive/perf/cbo_query16.q
+++ b/ql/src/test/queries/clientpositive/perf/cbo_query16.q
@@ -1,4 +1,5 @@
 set hive.mapred.mode=nonstrict;
+set hive.auto.convert.anti.join=true;
 -- start query 1 in stream 0 using template query16.tpl and seed 171719422
 explain cbo
 select  
diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query69.q b/ql/src/test/queries/clientpositive/perf/cbo_query69.q
index 01183fba762..c08c4f404c5 100644
--- a/ql/src/test/queries/clientpositive/perf/cbo_query69.q
+++ b/ql/src/test/queries/clientpositive/perf/cbo_query69.q
@@ -1,4 +1,5 @@
 set hive.mapred.mode=nonstrict;
+set hive.auto.convert.anti.join=true;
 -- start query 1 in stream 0 using template query69.tpl and seed 797269820
 explain cbo
 select  
diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query78.q b/ql/src/test/queries/clientpositive/perf/cbo_query78.q
index 87f94dcc4a7..dafb1e5eeae 100644
--- a/ql/src/test/queries/clientpositive/perf/cbo_query78.q
+++ b/ql/src/test/queries/clientpositive/perf/cbo_query78.q
@@ -1,4 +1,5 @@
 set hive.mapred.mode=nonstrict;
+set hive.auto.convert.anti.join=true;
 -- start query 1 in stream 0 using template query78.tpl and seed 1819994127
 explain cbo
 with ws as
diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query94.q b/ql/src/test/queries/clientpositive/perf/cbo_query94.q
index 4ae01bf27e5..59ba68f14b0 100644
--- a/ql/src/test/queries/clientpositive/perf/cbo_query94.q
+++ b/ql/src/test/queries/clientpositive/perf/cbo_query94.q
@@ -1,4 +1,5 @@
 set hive.mapred.mode=nonstrict;
+set hive.auto.convert.anti.join=true;
 -- start query 1 in stream 0 using template query94.tpl and seed 2031708268
 explain cbo
 select  
diff --git a/ql/src/test/queries/clientpositive/perf/query16.q b/ql/src/test/queries/clientpositive/perf/query16.q
index 05625f71aad..725d849cc7e 100644
--- a/ql/src/test/queries/clientpositive/perf/query16.q
+++ b/ql/src/test/queries/clientpositive/perf/query16.q
@@ -1,4 +1,5 @@
 set hive.mapred.mode=nonstrict;
+set hive.auto.convert.anti.join=true;
 -- start query 1 in stream 0 using template query16.tpl and seed 171719422
 explain
 select  
diff --git a/ql/src/test/queries/clientpositive/perf/query69.q b/ql/src/test/queries/clientpositive/perf/query69.q
index ce2d19cc5d0..520b3722c3f 100644
--- a/ql/src/test/queries/clientpositive/perf/query69.q
+++ b/ql/src/test/queries/clientpositive/perf/query69.q
@@ -1,4 +1,5 @@
 set hive.mapred.mode=nonstrict;
+set hive.auto.convert.anti.join=true;
 -- start query 1 in stream 0 using template query69.tpl and seed 797269820
 explain
 select  
diff --git a/ql/src/test/queries/clientpositive/perf/query78.q b/ql/src/test/queries/clientpositive/perf/query78.q
index ca9e6d6cb1a..b15bb5210b4 100644
--- a/ql/src/test/queries/clientpositive/perf/query78.q
+++ b/ql/src/test/queries/clientpositive/perf/query78.q
@@ -1,4 +1,5 @@
 set hive.mapred.mode=nonstrict;
+set hive.auto.convert.anti.join=true;
 -- start query 1 in stream 0 using template query78.tpl and seed 1819994127
 explain
 with ws as
diff --git a/ql/src/test/queries/clientpositive/perf/query94.q b/ql/src/test/queries/clientpositive/perf/query94.q
index 18253fa7d63..6a68e9b9e2a 100644
--- a/ql/src/test/queries/clientpositive/perf/query94.q
+++ b/ql/src/test/queries/clientpositive/perf/query94.q
@@ -1,4 +1,5 @@
 set hive.mapred.mode=nonstrict;
+set hive.auto.convert.anti.join=true;
 -- start query 1 in stream 0 using template query94.tpl and seed 2031708268
 explain
 select  
diff --git a/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q b/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q
index a32eb914170..ccfed8dcb23 100644
--- a/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q
+++ b/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q
@@ -1,5 +1,7 @@
 -- Test cases with subqueries having complex correlation predicates. 
 
+set hive.auto.convert.anti.join=true;
+
 -- HIVE-24957: Wrong results when subquery has COALESCE in correlation predicate
 create table author
 (
@@ -75,4 +77,4 @@ from book b
 where not exists
           (select a_authorkey
            from author a
-           where coalesce(b.b_authorkey, 400) = coalesce(a.a_authorkey, 400));
\ No newline at end of file
+           where coalesce(b.b_authorkey, 400) = coalesce(a.a_authorkey, 400));
diff --git a/ql/src/test/queries/clientpositive/subquery_corr_join.q b/ql/src/test/queries/clientpositive/subquery_corr_join.q
index cb6e82fde07..b20cf102974 100644
--- a/ql/src/test/queries/clientpositive/subquery_corr_join.q
+++ b/ql/src/test/queries/clientpositive/subquery_corr_join.q
@@ -1,3 +1,4 @@
+set hive.auto.convert.anti.join=true;
 create table alltypestiny(
 id int,
 int_col int,
@@ -70,4 +71,4 @@ from ta
 where not exists
     (select 1
      from tb inner JOIN tc
-     on ta.id = tb.id);
\ No newline at end of file
+     on ta.id = tb.id);
diff --git a/ql/src/test/queries/clientpositive/subquery_exists_windowfunc.q b/ql/src/test/queries/clientpositive/subquery_exists_windowfunc.q
index 352f1e8fefb..3f1a2c47e5c 100644
--- a/ql/src/test/queries/clientpositive/subquery_exists_windowfunc.q
+++ b/ql/src/test/queries/clientpositive/subquery_exists_windowfunc.q
@@ -1,3 +1,4 @@
+set hive.auto.convert.anti.join=true;
 create table alltypestiny(
 id int,
 int_col int,
diff --git a/ql/src/test/queries/clientpositive/subquery_in_having.q b/ql/src/test/queries/clientpositive/subquery_in_having.q
index 2861c4e3e26..acb3c23da8f 100644
--- a/ql/src/test/queries/clientpositive/subquery_in_having.q
+++ b/ql/src/test/queries/clientpositive/subquery_in_having.q
@@ -1,5 +1,6 @@
 --! qt:dataset:src
 set hive.mapred.mode=nonstrict;
+set hive.auto.convert.anti.join=true;
 -- SORT_QUERY_RESULTS
 
 -- data setup
diff --git a/ql/src/test/queries/clientpositive/subquery_join_rewrite.q b/ql/src/test/queries/clientpositive/subquery_join_rewrite.q
index 1f4ba715631..ae7c2762d2d 100644
--- a/ql/src/test/queries/clientpositive/subquery_join_rewrite.q
+++ b/ql/src/test/queries/clientpositive/subquery_join_rewrite.q
@@ -1,5 +1,7 @@
 -- SORT_QUERY_RESULTS
 
+set hive.auto.convert.anti.join=true;
+
 create table web_sales (ws_order_number int, ws_warehouse_sk int) stored as orc;
 
 insert into web_sales values
diff --git a/ql/src/test/queries/clientpositive/subquery_multi.q b/ql/src/test/queries/clientpositive/subquery_multi.q
index 7389384f20b..40e9e251ee5 100644
--- a/ql/src/test/queries/clientpositive/subquery_multi.q
+++ b/ql/src/test/queries/clientpositive/subquery_multi.q
@@ -3,6 +3,7 @@
 --! qt:dataset:lineitem
 set hive.mapred.mode=nonstrict;
 set hive.explain.user=false;
+set hive.auto.convert.anti.join=true;
 
 -- SORT_QUERY_RESULTS
 
diff --git a/ql/src/test/queries/clientpositive/subquery_notexists.q b/ql/src/test/queries/clientpositive/subquery_notexists.q
index 741366751e7..c4707e62312 100644
--- a/ql/src/test/queries/clientpositive/subquery_notexists.q
+++ b/ql/src/test/queries/clientpositive/subquery_notexists.q
@@ -2,6 +2,7 @@
 --! qt:dataset:part
 -- SORT_QUERY_RESULTS
 set hive.mapred.mode=nonstrict;
+set hive.auto.convert.anti.join=true;
 
 
 -- no agg, corr
diff --git a/ql/src/test/queries/clientpositive/subquery_notexists_having.q b/ql/src/test/queries/clientpositive/subquery_notexists_having.q
index cce179a7c7b..fefb9ed4cf1 100644
--- a/ql/src/test/queries/clientpositive/subquery_notexists_having.q
+++ b/ql/src/test/queries/clientpositive/subquery_notexists_having.q
@@ -1,5 +1,6 @@
 --! qt:dataset:src
 set hive.mapred.mode=nonstrict;
+set hive.auto.convert.anti.join=true;
 
 
 -- no agg, corr
@@ -45,4 +46,4 @@ having not exists
   from src a 
   where b.value = a.value and a.value > 'val_12'
   )
-;
\ No newline at end of file
+;