You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2021/05/11 01:17:14 UTC

[impala] branch master updated: IMPALA-7844: Default to not allow ordinals in HAVING clause

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new ff52064  IMPALA-7844: Default to not allow ordinals in HAVING clause
ff52064 is described below

commit ff52064823695e0cfffc9c9df3e7a1487f7b570a
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Mon May 10 16:45:31 2021 +0800

    IMPALA-7844: Default to not allow ordinals in HAVING clause
    
    Base on the discussion of the previous patch, we decide to not allow
    ordinals in the HAVING clause since 4.0. It's a non-standard feature
    that unintentionally supported by Impala 3.x and earlier versions.
    
    This patch disables it by default, and add a feature flag to turn it on
    for users that do depend on it.
    
    Tests:
     - Modify existing FE tests to test on the flag.
     - Add custom cluster test to verify the flag works.
    
    Change-Id: I0a57b8b65b046fae483e485e8391f8222fa586a5
    Reviewed-on: http://gerrit.cloudera.org:8080/17415
    Reviewed-by: Zoltan Borok-Nagy <bo...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/util/backend-gflag-util.cc                  |  6 ++++
 common/thrift/BackendGflags.thrift                 |  2 ++
 .../org/apache/impala/analysis/SelectStmt.java     | 37 ++--------------------
 .../org/apache/impala/service/BackendConfig.java   |  8 +++++
 .../apache/impala/analysis/AnalyzeStmtsTest.java   | 12 ++++---
 tests/custom_cluster/test_disable_features.py      | 16 ++++++++++
 6 files changed, 42 insertions(+), 39 deletions(-)

diff --git a/be/src/util/backend-gflag-util.cc b/be/src/util/backend-gflag-util.cc
index 54e4c39..a218c06 100644
--- a/be/src/util/backend-gflag-util.cc
+++ b/be/src/util/backend-gflag-util.cc
@@ -160,6 +160,11 @@ DEFINE_bool(enable_row_filtering, true,
     "If false, disable the row filtering feature. Defaults to be true. Enabling this flag"
     " requires enable_column_masking to be true.");
 
+DEFINE_bool(allow_ordinals_in_having, false,
+    "If true, allow using ordinals in HAVING clause. This non-standard feature is "
+    "supported in Impala 3.x and earlier. We intend to disable it since 4.0. So it "
+    "defaults to be false. See IMPALA-7844.");
+
 namespace impala {
 
 Status GetConfigFromCommand(const string& flag_cmd, string& result) {
@@ -279,6 +284,7 @@ Status PopulateThriftBackendGflags(TBackendGflags& cfg) {
   cfg.__set_saml2_ee_test_mode(FLAGS_saml2_ee_test_mode);
   cfg.__set_scratch_dirs(FLAGS_scratch_dirs);
   cfg.__set_max_wait_time_for_sync_ddl_s(FLAGS_max_wait_time_for_sync_ddl_s);
+  cfg.__set_allow_ordinals_in_having(FLAGS_allow_ordinals_in_having);
   return Status::OK();
 }
 
diff --git a/common/thrift/BackendGflags.thrift b/common/thrift/BackendGflags.thrift
index 7d75c69..2d25efe 100644
--- a/common/thrift/BackendGflags.thrift
+++ b/common/thrift/BackendGflags.thrift
@@ -195,4 +195,6 @@ struct TBackendGflags {
   85: required bool enable_row_filtering
 
   86: required i32 max_wait_time_for_sync_ddl_s
+
+  87: required bool allow_ordinals_in_having
 }
diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index ef175eb..e153195 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -19,7 +19,6 @@ package org.apache.impala.analysis;
 
 import java.util.ArrayList;
 import java.util.Collections;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
@@ -39,6 +38,7 @@ import org.apache.impala.common.Pair;
 import org.apache.impala.common.TableAliasGenerator;
 import org.apache.impala.common.TreeNode;
 import org.apache.impala.rewrite.ExprRewriter;
+import org.apache.impala.service.BackendConfig;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -54,39 +54,6 @@ import com.google.common.collect.Lists;
 public class SelectStmt extends QueryStmt {
   private final static Logger LOG = LoggerFactory.getLogger(SelectStmt.class);
 
-  /**
-   * Impala 3.x and earlier provide non-standard support for a single ordinal
-   * in the HAVING clause:
-   *
-   * SELECT region, count(*), count(*) > 10 FROM sales
-   * HAVING 3
-   *
-   * Most other SQL dialects do not allow ordinals in expressions (such as
-   * HAVING), only in lists (GROUP BY, ORDER BY). The standard way to express
-   * the above is:
-   *
-   * SELECT region, count(*) FROM sales
-   * HAVING count(*) > 10
-   *
-   * Or, better (not currently supported by IMPALA):
-   *
-   * SELECT region, count(*) c FROM SALES
-   * HAVING c > 10
-   *
-   * We intend to disable this non-standard feature in the next major
-   * release. For now, we leave the feature/bug enabled, controlled by
-   * the following constant.
-   *
-   * In that same release, we will enable the third example above.
-   * At present, all that is supported is the odd:
-   *
-   * SELECT region, count(*), count(*) > 10 p FROM sales
-   * HAVING p
-   *
-   * See IMPALA-7844.
-   */
-  public static final boolean ALLOW_ORDINALS_IN_HAVING = true;
-
   /////////////////////////////////////////
   // BEGIN: Members that need to be reset()
 
@@ -739,7 +706,7 @@ public class SelectStmt extends QueryStmt {
       }
       // Resolve (top-level) aliases and analyzes
       havingPred_ = resolveReferenceExpr(havingClause_, "HAVING", analyzer_,
-          ALLOW_ORDINALS_IN_HAVING);
+          BackendConfig.INSTANCE.getAllowOrdinalsInHaving());
       // can't contain analytic exprs
       Expr analyticExpr = havingPred_.findFirstOf(AnalyticExpr.class);
       if (analyticExpr != null) {
diff --git a/fe/src/main/java/org/apache/impala/service/BackendConfig.java b/fe/src/main/java/org/apache/impala/service/BackendConfig.java
index 9513b9b..4f68669 100644
--- a/fe/src/main/java/org/apache/impala/service/BackendConfig.java
+++ b/fe/src/main/java/org/apache/impala/service/BackendConfig.java
@@ -253,6 +253,14 @@ public class BackendConfig {
 
   public String getScratchDirs() { return backendCfg_.scratch_dirs; }
 
+  public boolean getAllowOrdinalsInHaving() {
+    return backendCfg_.allow_ordinals_in_having;
+  }
+
+  public void setAllowOrdinalsInHaving(boolean allow_ordinals_in_having) {
+    backendCfg_.allow_ordinals_in_having = allow_ordinals_in_having;
+  }
+
   // Inits the auth_to_local configuration in the static KerberosName class.
   private static void initAuthToLocal() {
     // If auth_to_local is enabled, we read the configuration hadoop.security.auth_to_local
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
index 59c0028..12aa6bd 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
@@ -37,6 +37,7 @@ import org.apache.impala.catalog.Type;
 import org.apache.impala.common.AnalysisException;
 import org.apache.impala.common.FileSystemUtil;
 import org.apache.impala.common.ImpalaException;
+import org.apache.impala.service.BackendConfig;
 import org.apache.impala.thrift.TFunctionCategory;
 import org.junit.Assert;
 import org.junit.Test;
@@ -1138,9 +1139,10 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
   // HAVING is an expression, not a list like GROUP BY or ORDER BY.
   // Verify that an integer is treated as a constant, not an ordinal.
   @Test
-  public void TestHavingIntegers() throws AnalysisException {
-    if (SelectStmt.ALLOW_ORDINALS_IN_HAVING) {
+  public void TestHavingIntegers() {
+    try {
       // Legacy 3.x functionality
+      BackendConfig.INSTANCE.setAllowOrdinalsInHaving(true);
       AnalyzesOk("select not bool_col as nb from functional.alltypes having 1");
       AnalysisError("select count(*) from functional.alltypes having 1",
           "HAVING clause 'count(*)' requires return type 'BOOLEAN'. " +
@@ -1151,15 +1153,15 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
           "sum(id) OVER (ORDER BY id ASC)");
       AnalysisError("select sum(id) over(order by id) from functional.alltypes having -1",
           "HAVING: ordinal must be >= 1: -1");
-    } else {
       // Forward-looking, post 3.x functionality
       // IMPALA-7844: Ordinals not supported in HAVING since
       // HAVING is an expression, not a list like GROUP BY or ORDER BY.
+      BackendConfig.INSTANCE.setAllowOrdinalsInHaving(false);
       AnalysisError("select not bool_col as nb from functional.alltypes having 1",
           "HAVING clause '1' requires return type 'BOOLEAN'. " +
           "Actual type is 'TINYINT'.");
       AnalysisError("select not bool_col as nb from functional.alltypes having -1",
-          "HAVING clause '1' requires return type 'BOOLEAN'. " +
+          "HAVING clause '-1' requires return type 'BOOLEAN'. " +
           "Actual type is 'TINYINT'.");
       // Constant exprs should not be interpreted as ordinals
       AnalysisError("select int_col, bool_col, count(*) from functional.alltypes " +
@@ -1170,6 +1172,8 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
           "group by 1, 2 having if(TRUE, 2, int_col)",
           "HAVING clause 'if(TRUE, 2, int_col)' requires return type 'BOOLEAN'. " +
           "Actual type is 'INT'.");
+    } finally {
+      BackendConfig.INSTANCE.setAllowOrdinalsInHaving(false);
     }
   }
 
diff --git a/tests/custom_cluster/test_disable_features.py b/tests/custom_cluster/test_disable_features.py
index a8d2c0b..8d2725c 100644
--- a/tests/custom_cluster/test_disable_features.py
+++ b/tests/custom_cluster/test_disable_features.py
@@ -47,3 +47,19 @@ class TestDisableFeatures(CustomClusterTestSuite):
         use_db=unique_database, multiple_impalad=True)
     self.run_test_case('QueryTest/alter-table-set-column-stats', vector,
         use_db=unique_database, multiple_impalad=True)
+
+  @pytest.mark.execute_serially
+  @CustomClusterTestSuite.with_args("--allow_ordinals_in_having=true")
+  def test_allow_ordinals_in_having(self, vector):
+    """Mirror the FE tests in AnalyzeStmtsTest#TestHavingIntegers to make sure the flag
+       can bring back the legacy feature"""
+    self.client.execute("select not bool_col as nb from functional.alltypes having 1")
+    self.execute_query_expect_failure(
+        self.client, "select count(*) from functional.alltypes having 1")
+    self.client.execute("select count(*) > 10 from functional.alltypes having 1")
+    self.execute_query_expect_failure(
+        self.client,
+        "select sum(id) over(order by id) from functional.alltypes having 1")
+    self.execute_query_expect_failure(
+        self.client,
+        "select sum(id) over(order by id) from functional.alltypes having -1")