You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2021/04/22 05:34:19 UTC

[impala] 02/02: IMPALA-10445: Adjust NDV's scale with query option

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 1fb7dbac0d43f3ccbbbbaaf9c41db10d3320fc48
Author: fifteencai <fi...@tencent.com>
AuthorDate: Fri Jan 22 12:40:54 2021 +0800

    IMPALA-10445: Adjust NDV's scale with query option
    
    This is a new way to control NDV's scale.
    
    Since IMPALA-2658, we can trade memory for more accurate
    estimation by setting larger `scale` in SQL function
    NDV(<expr>, <scale>). However the use of larger NDV scale requires
    the modification of SQL queries which may not be practical in certain
    applications:
    
    - Firstly, SQL writers are reluctant to lower that scale. They prone
    to fill up the scale, which will make the cluster unstable. Especially
    when there are `group by`s with high cardinalities. So it is wiser to
    let cluster admin other than sql writer choose appropriate scale.
    
    - Secondly, In some application scenarios, queries are stored in DBs.
    In a BI system, for example, rewriting thousands of SQLs is risky.
    
    In this commit, we introduced a new Query Option `DEFAULT_NDV_SCALE`
    with the following semantics:
    
    1. The allowed value is in the range [1..10];
    2. Previously, the scale used in NDV(<expr>) functions was fixed at 2.
    Now the scale is provided by the newly added query options.
    3. It does not influence the NDV scale for SQL function
    NDV(<expr>, <scale>) in which the NDV scale is provided by the 2nd
    argument <scale>.
    
    We also refactored method `Analyze` to make sure APPX_COUNT_DISTINCT
    can work with this query option. After this, cluster admins can
    substitute `count(distinct <expr>)` with `ndv(<expr>, scale)`.
    
    Implementation details:
    
    - The default value of DEFAULT_NDV_SCALE is 2, so we won't change
    the default ndv behavior.
    - We port `CountDistinctToNdv` transform logic from
    `SelectStmt.analyze()` to `ExprRewriter`, making it compatible with
    further rewrite rules.
    - The newly added rewrite rule `DefaultNdvScaleRule` is applied
    after `CountDistinctToNdvRule`.
    
    Usage:
    
    To set a default ndv scale:
    ```
    SET DEFAULT_NDV_SCALE = 10;
    ```
    
    To unset:
    ```
    SET DEFAULT_NDV_SCALE = 2;
    ```
    
    Here are test results of a typical workload (cardinality=40,090,650):
    +====================================================================+
    |   Metric    | Count Distinct |    NDV2    |    NDV5    |    NDV10  |
    +--------------------------------------------------------------------+
    |  Memory(GB) |       3.83     |    1.84    |    1.85    |     1.89  |
    | Duration(s) |      182.89    |   30.22    |    29.72   |     29.24 |
    |  ErrorRate  |        0%      |    1.8%    |    1.17%   |     0.06% |
    +====================================================================+
    
    Testing:
    1) Added 3 unit test cases in `ExprRewriteRulesTest`.
    2) Added 5 unit test cases in `ExprRewriterTest`.
    3) Ran all front-end unit test, passed.
    4) Added a new query-option test.
    
    Change-Id: I1669858a6e8252e167b464586e8d0b6cb0d0bd50
    Reviewed-on: http://gerrit.cloudera.org:8080/17306
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/service/query-options-test.cc               |  1 +
 be/src/service/query-options.cc                    | 13 ++++
 be/src/service/query-options.h                     |  5 +-
 common/thrift/ImpalaService.thrift                 |  4 +
 common/thrift/Query.thrift                         |  3 +
 .../java/org/apache/impala/analysis/Analyzer.java  |  6 +-
 .../org/apache/impala/analysis/SelectStmt.java     | 31 --------
 .../impala/rewrite/CountDistinctToNdvRule.java     | 66 +++++++++++++++++
 .../apache/impala/rewrite/DefaultNdvScaleRule.java | 73 ++++++++++++++++++
 .../impala/analysis/ExprRewriteRulesTest.java      | 60 +++++++++++++++
 .../apache/impala/analysis/ExprRewriterTest.java   | 86 ++++++++++++++++++++++
 11 files changed, 314 insertions(+), 34 deletions(-)

diff --git a/be/src/service/query-options-test.cc b/be/src/service/query-options-test.cc
index 0759f5c..c6a706f 100644
--- a/be/src/service/query-options-test.cc
+++ b/be/src/service/query-options-test.cc
@@ -259,6 +259,7 @@ TEST(QueryOptions, SetIntOptions) {
           {MIN_STATEMENT_EXPRESSION_LIMIT, I32_MAX}},
       {MAKE_OPTIONDEF(max_cnf_exprs),                  {-1, I32_MAX}},
       {MAKE_OPTIONDEF(max_fs_writers),                 {0, I32_MAX}},
+      {MAKE_OPTIONDEF(default_ndv_scale),              {1, 10}},
   };
   for (const auto& test_case : case_set) {
     const OptionDef<int32_t>& option_def = test_case.first;
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index 35b0610..7fb68a2 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -1063,6 +1063,19 @@ Status impala::SetQueryOption(const string& key, const string& value,
         query_options->__set_analytic_rank_pushdown_threshold(val);
         break;
       }
+      case TImpalaQueryOptions::DEFAULT_NDV_SCALE: {
+        StringParser::ParseResult result;
+        const int32_t scale =
+            StringParser::StringToInt<int32_t>(value.c_str(), value.length(), &result);
+        if (value == nullptr || result != StringParser::PARSE_SUCCESS || scale < 1 ||
+            scale > 10) {
+          return Status(
+              Substitute("Invalid NDV scale: '$0'. "
+                         "Only integer value in [1, 10] is allowed.", value));
+        }
+        query_options->__set_default_ndv_scale(scale);
+        break;
+      }
       default:
         if (IsRemovedQueryOption(key)) {
           LOG(WARNING) << "Ignoring attempt to set removed query option '" << key << "'";
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index c4cda49..f2da309 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -47,7 +47,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
 // time we add or remove a query option to/from the enum TImpalaQueryOptions.
 #define QUERY_OPTS_TABLE\
   DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(),\
-      TImpalaQueryOptions::SHOW_COLUMN_MINMAX_STATS + 1);\
+      TImpalaQueryOptions::DEFAULT_NDV_SCALE + 1);\
   REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED)\
   QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR)\
   REMOVED_QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS)\
@@ -242,7 +242,8 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
       TQueryOptionLevel::ADVANCED)\
   QUERY_OPT_FN(show_column_minmax_stats, SHOW_COLUMN_MINMAX_STATS,\
       TQueryOptionLevel::ADVANCED)\
-  ;
+  QUERY_OPT_FN(default_ndv_scale, DEFAULT_NDV_SCALE, TQueryOptionLevel::ADVANCED)
+;
 
 /// Enforce practical limits on some query options to avoid undesired query state.
 static const int64_t SPILLABLE_BUFFER_LIMIT = 1LL << 40; // 1 TB
diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift
index e998c46..da079c5 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -652,6 +652,10 @@ enum TImpalaQueryOptions {
 
   // If true, show the min and max stats during show column stats.
   SHOW_COLUMN_MINMAX_STATS = 125
+
+  // Default NDV scale settings, make it easier to change scale in SQL function
+  // NDV(<expr>).
+  DEFAULT_NDV_SCALE = 126
 }
 
 // The summary of a DML statement.
diff --git a/common/thrift/Query.thrift b/common/thrift/Query.thrift
index d2d8e7d..5dd288f 100644
--- a/common/thrift/Query.thrift
+++ b/common/thrift/Query.thrift
@@ -484,6 +484,9 @@ struct TQueryOptions {
 
   // See comment in ImpalaService.thrift
   126: optional bool show_column_minmax_stats = false;
+
+  // Default NDV scale
+  127: optional i32 default_ndv_scale = 2;
 }
 
 // Impala currently has three types of sessions: Beeswax, HiveServer2 and external
diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 3af6bda..916de22 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -81,6 +81,8 @@ import org.apache.impala.rewrite.NormalizeExprsRule;
 import org.apache.impala.rewrite.SimplifyCastStringToTimestamp;
 import org.apache.impala.rewrite.SimplifyConditionalsRule;
 import org.apache.impala.rewrite.SimplifyDistinctFromRule;
+import org.apache.impala.rewrite.CountDistinctToNdvRule;
+import org.apache.impala.rewrite.DefaultNdvScaleRule;
 import org.apache.impala.service.FeSupport;
 import org.apache.impala.thrift.TAccessEvent;
 import org.apache.impala.thrift.TCatalogObjectType;
@@ -282,7 +284,7 @@ public class Analyzer {
   /**
    * Check if the table supports the operation
    * @param table Table need to check
-   * @param operationType The type of operation
+   * @param type The type of operation
    * @throws AnalysisException If the table does not support the operation
    */
   public static void checkTableCapability(FeTable table, OperationType type)
@@ -526,6 +528,8 @@ public class Analyzer {
         rules.add(NormalizeCountStarRule.INSTANCE);
         rules.add(SimplifyDistinctFromRule.INSTANCE);
         rules.add(SimplifyCastStringToTimestamp.INSTANCE);
+        rules.add(CountDistinctToNdvRule.INSTANCE);
+        rules.add(DefaultNdvScaleRule.INSTANCE);
       }
       exprRewriter_ = new ExprRewriter(rules);
     }
diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index 34232bc..8acdcb6 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -300,7 +300,6 @@ public class SelectStmt extends QueryStmt {
     private final Analyzer analyzer_;
     private List<Expr> groupingExprsCopy_;
     private List<FunctionCallExpr> aggExprs_;
-    private ExprSubstitutionMap ndvSmap_;
     private ExprSubstitutionMap countAllMap_;
 
     private SelectAnalyzer(Analyzer analyzer) {
@@ -326,7 +325,6 @@ public class SelectStmt extends QueryStmt {
         verifyAggSemantics();
         analyzeGroupingExprs();
         collectAggExprs();
-        rewriteCountDistinct();
         buildAggregateExprs();
         buildResultExprs();
         verifyAggregation();
@@ -859,34 +857,6 @@ public class SelectStmt extends QueryStmt {
       }
     }
 
-    private void rewriteCountDistinct() {
-      // Optionally rewrite all count(distinct <expr>) into equivalent NDV() calls.
-      if (!analyzer_.getQueryCtx().client_request.query_options.appx_count_distinct) {
-        return;
-      }
-      ndvSmap_ = new ExprSubstitutionMap();
-      for (FunctionCallExpr aggExpr: aggExprs_) {
-        if (!aggExpr.isDistinct()
-            || !aggExpr.getFnName().getFunction().equals("count")
-            || aggExpr.getParams().size() != 1) {
-          continue;
-        }
-        FunctionCallExpr ndvFnCall =
-            new FunctionCallExpr("ndv", aggExpr.getParams().exprs());
-        ndvFnCall.analyzeNoThrow(analyzer_);
-        Preconditions.checkState(ndvFnCall.getType().equals(aggExpr.getType()));
-        ndvSmap_.put(aggExpr, ndvFnCall);
-      }
-      // Replace all count(distinct <expr>) with NDV(<expr>).
-      List<Expr> substAggExprs = Expr.substituteList(aggExprs_,
-          ndvSmap_, analyzer_, false);
-      aggExprs_.clear();
-      for (Expr aggExpr: substAggExprs) {
-        Preconditions.checkState(aggExpr instanceof FunctionCallExpr);
-        aggExprs_.add((FunctionCallExpr) aggExpr);
-      }
-    }
-
     private void buildAggregateExprs() throws AnalysisException {
       // When DISTINCT aggregates are present, non-distinct (i.e. ALL) aggregates are
       // evaluated in two phases (see AggregateInfo for more details). In particular,
@@ -900,7 +870,6 @@ public class SelectStmt extends QueryStmt {
       // i) There is no GROUP-BY clause, and
       // ii) Other DISTINCT aggregates are present.
       countAllMap_ = createCountAllMap();
-      countAllMap_ = ExprSubstitutionMap.compose(ndvSmap_, countAllMap_, analyzer_);
       List<Expr> substitutedAggs =
           Expr.substituteList(aggExprs_, countAllMap_, analyzer_, false);
       aggExprs_.clear();
diff --git a/fe/src/main/java/org/apache/impala/rewrite/CountDistinctToNdvRule.java b/fe/src/main/java/org/apache/impala/rewrite/CountDistinctToNdvRule.java
new file mode 100644
index 0000000..5642062
--- /dev/null
+++ b/fe/src/main/java/org/apache/impala/rewrite/CountDistinctToNdvRule.java
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.rewrite;
+
+import org.apache.impala.analysis.Expr;
+import org.apache.impala.analysis.FunctionCallExpr;
+import org.apache.impala.common.AnalysisException;
+import org.apache.impala.analysis.Analyzer;
+
+/*
+ * Rewrite rule to replace count distinct to ndv
+ *
+ */
+public class CountDistinctToNdvRule implements ExprRewriteRule{
+
+    // Singleton
+    public static CountDistinctToNdvRule INSTANCE = new CountDistinctToNdvRule();
+
+    /*
+     * This is an implementation of IMPALA-110.
+     * Replace count distinct operators to NDVs if APPX_COUNT_DISTINCT is set.
+     * Will traverse the expression tree and return modified expr.
+     *
+     * @param expr expr to be checked
+     * @param analyzer query analyzer
+     */
+    @Override
+    public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException {
+        // Short-circuit rules
+        if (!analyzer.getQueryCtx().client_request.query_options.appx_count_distinct) {
+            return expr;
+        }
+        if (!(expr instanceof FunctionCallExpr) || !expr.isAnalyzed()) return expr;
+
+        // Returns if expr is not count(distinct), or it has more 1 param.
+        FunctionCallExpr oldFunctionCallExpr = (FunctionCallExpr) expr;
+        if (!oldFunctionCallExpr.getFnName().getFunction().equals("count")
+                || !oldFunctionCallExpr.isDistinct()
+                || oldFunctionCallExpr.getParams().exprs().size() != 1 ) {
+            return expr;
+        }
+
+        // Create a new function `ndv(<expr>)` to substitute the `count(distinct <expr>)`
+        FunctionCallExpr ndvFunc = new FunctionCallExpr("ndv",
+                oldFunctionCallExpr.getParams().exprs());
+
+        // Analyze the newly added FunctionCall, otherwise follow-up rules won't fire.
+        ndvFunc.analyze(analyzer);
+        return ndvFunc;
+    }
+}
diff --git a/fe/src/main/java/org/apache/impala/rewrite/DefaultNdvScaleRule.java b/fe/src/main/java/org/apache/impala/rewrite/DefaultNdvScaleRule.java
new file mode 100644
index 0000000..40aac66
--- /dev/null
+++ b/fe/src/main/java/org/apache/impala/rewrite/DefaultNdvScaleRule.java
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.rewrite;
+
+import org.apache.impala.analysis.Analyzer;
+import org.apache.impala.analysis.Expr;
+import org.apache.impala.analysis.FunctionCallExpr;
+import org.apache.impala.analysis.NumericLiteral;
+import org.apache.impala.catalog.Type;
+import org.apache.impala.common.AnalysisException;
+
+import java.math.BigDecimal;
+import java.util.List;
+
+/**
+ * Rewrite rule to set NDV call's default scale.
+ *
+ * If DEFAULT_NDV_SCALE query option is set. This rule will rewrite the NDV calls
+ * to Scaled NDV calls. Note this rule works with APPX_COUNT_DISTINCT option. If
+ * APPX_COUNT_DISTINCT is set. Any count(distinct) call will be firstly converted
+ * to NDV, and then converted to NDV(scale).
+ */
+public class DefaultNdvScaleRule implements ExprRewriteRule{
+    // Singleton
+    public static DefaultNdvScaleRule INSTANCE = new DefaultNdvScaleRule();
+
+    /**
+     * Applies this rewrite rule to the given analyzed Expr. Returns the transformed and
+     * analyzed Expr or the original unmodified Expr if no changes were made. If any
+     * changes were made, the new Expr is guaranteed to be a different Expr object,
+     * so callers can rely on object reference comparison for change detection.
+     *
+     * @param expr expr to be checked
+     * @param analyzer default analyzer
+     */
+    @Override
+    public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException {
+        // Short-circuit rules
+        if (!(expr instanceof FunctionCallExpr) || !expr.isAnalyzed()) return expr;
+        int scale = analyzer.getQueryOptions().getDefault_ndv_scale();
+        if (scale > 10 || scale < 1 || scale == 2) return expr;
+
+        // Returns if expr is not ndv, or it is scaled ndv already.
+        FunctionCallExpr oldFunctionCallExpr = (FunctionCallExpr) expr;
+        if (!oldFunctionCallExpr.getFnName().getFunction().equals("ndv")
+                || oldFunctionCallExpr.getParams().exprs().size() > 1) {
+            return expr;
+        }
+
+        // Do substitution
+        List<Expr> params = oldFunctionCallExpr.getParams().exprs();
+        params.add(new NumericLiteral(BigDecimal.valueOf(scale), Type.INT));
+        FunctionCallExpr functionCall =  new FunctionCallExpr("ndv", params);
+        functionCall.analyze(analyzer);
+
+        return functionCall;
+    }
+}
diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
index 24dce45..d91a53c 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
@@ -177,6 +177,19 @@ public class ExprRewriteRulesTest extends FrontendTestBase {
     return RewritesOk(tableName, exprStr, Lists.newArrayList(rule), expectedExprStr);
   }
 
+  /**
+   * Given a list of `rule`, this function checks whether the rewritten <expr> is as
+   * expected.
+   *
+   * If no rule in `rules` is expected to be fired, callers should set expectedExprStr
+   * to null or "NULL". Otherwise, this function would throw an exception like
+   * "Rule xxx didn't fire"
+   *
+   * @param exprStr: origin expr
+   * @param rules: list of rewrite rules
+   * @param expectedExprStr: expected expr
+   * @return: Expr: rewritten expr
+   */
   public Expr RewritesOk(String exprStr, List<ExprRewriteRule> rules,
       String expectedExprStr)
       throws ImpalaException {
@@ -774,6 +787,53 @@ public class ExprRewriteRulesTest extends FrontendTestBase {
   }
 
   @Test
+  public void testCountDistinctToNdvRule() throws ImpalaException {
+    List<ExprRewriteRule> rules = Lists.newArrayList(
+            org.apache.impala.rewrite.CountDistinctToNdvRule.INSTANCE
+    );
+    session.options().setAppx_count_distinct(true);
+    RewritesOk("count(distinct bool_col)", rules, "ndv(bool_col)");
+  }
+
+  @Test
+  public void testDefaultNdvScaleRule() throws ImpalaException {
+    List<ExprRewriteRule> rules = Lists.newArrayList(
+            org.apache.impala.rewrite.DefaultNdvScaleRule.INSTANCE
+    );
+    session.options().setDefault_ndv_scale(10);
+    RewritesOk("ndv(bool_col)", rules, "ndv(bool_col, 10)");
+  }
+
+  @Test
+  public void testDefaultNdvScaleRuleNotSet() throws ImpalaException {
+    List<ExprRewriteRule> rules = Lists.newArrayList(
+            org.apache.impala.rewrite.DefaultNdvScaleRule.INSTANCE
+    );
+    RewritesOk("ndv(bool_col)", rules, null);
+  }
+
+  @Test
+  public void testDefaultNdvScaleRuleSetDefault() throws ImpalaException {
+    List<ExprRewriteRule> rules = Lists.newArrayList(
+            org.apache.impala.rewrite.DefaultNdvScaleRule.INSTANCE
+    );
+    session.options().setDefault_ndv_scale(2);
+    RewritesOk("ndv(bool_col)", rules, null);
+  }
+
+
+  @Test
+  public void testCountDistinctToNdvAndDefaultNdvScaleRule() throws ImpalaException {
+    List<ExprRewriteRule> rules = Lists.newArrayList(
+            org.apache.impala.rewrite.CountDistinctToNdvRule.INSTANCE,
+            org.apache.impala.rewrite.DefaultNdvScaleRule.INSTANCE
+    );
+    session.options().setAppx_count_distinct(true);
+    session.options().setDefault_ndv_scale(10);
+    RewritesOk("count(distinct bool_col)", rules, "ndv(bool_col, 10)");
+  }
+
+  @Test
   public void testSimplifyCastStringToTimestamp() throws ImpalaException {
     ExprRewriteRule rule = SimplifyCastStringToTimestamp.INSTANCE;
 
diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
index f455198..71a5f4d 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
@@ -500,4 +500,90 @@ public class ExprRewriterTest extends AnalyzerTest {
     Assert.assertEquals("Bad sql with implicit casts from original query:\n" + query,
         expectedToSqlWithImplicitCasts, actual);
   }
+
+  @Test
+  public void TestToSqlWithAppxCountDistinctAndDefaultNdvs() {
+    TQueryOptions options = new TQueryOptions();
+    options.setEnable_expr_rewrites(true);
+
+    AnalysisContext ctx = createAnalysisCtx(options);
+
+    //----------------------
+    // Test query rewrites.
+    //----------------------
+    String countDistinctSql = "SELECT count(DISTINCT id) FROM functional.alltypes";
+
+    // No rewrite
+    assertToSql(ctx, countDistinctSql, countDistinctSql, countDistinctSql);
+
+    // Rewrite to ndv
+    options.setAppx_count_distinct(true);
+    assertToSql(createAnalysisCtx(options), countDistinctSql, countDistinctSql,
+            "SELECT ndv(id) FROM functional.alltypes");
+
+    // Rewrite to ndv(10)
+    options.setDefault_ndv_scale(10);
+    assertToSql(createAnalysisCtx(options), countDistinctSql, countDistinctSql,
+            "SELECT ndv(id, 10) FROM functional.alltypes");
+
+    String ndvSql = "SELECT ndv(id) FROM functional.alltypes";
+
+    // No rewrite
+    options.setDefault_ndv_scale(2);
+    assertToSql(createAnalysisCtx(options), ndvSql, ndvSql, ndvSql);
+
+    // Rewrite ndv scale
+    options.setDefault_ndv_scale(9);
+    assertToSql(createAnalysisCtx(options), ndvSql, ndvSql,
+            "SELECT ndv(id, 9) FROM functional.alltypes");
+
+    //-----------------------------------------------------------------------------------
+    // Test complex sql which has all 3 types of functions.
+    // NDV(<expr>) | NDV(<expr>, <scale>) | COUNT(DISTINCT <expr>)
+    //
+    // For coverage, we have these scenarios:
+    // CASE 1: DEFAULT_NDV_SCALE=5(same as the value in original sql),
+    //         APPX_COUNT_DISTINCT=True
+    // CASE 2: DEFAULT_NDV_SCALE=5, APPX_COUNT_DISTINCT=False
+    // CASE 3: DEFAULT_NDV_SCALE=9(different with original), APPX_COUNT_DISTINCT=True
+    // CASE 4: DEFAULT_NDV_SCALE=3, APPX_COUNT_DISTINCT=False
+    // CASE 5: DEFAULT_NDV_SCALE=2, APPX_COUNT_DISTINCT=True
+    // CASE 6: DEFAULT_NDV_SCALE=2, APPX_COUNT_DISTINCT=False
+    //-----------------------------------------------------------------------------------
+    String sql1 = "SELECT ndv(id), ndv(id, 5), count(DISTINCT id) FROM " +
+            "functional.alltypes";
+
+    // CASE 1
+    options.setDefault_ndv_scale(5).setAppx_count_distinct(true);
+    assertToSql(createAnalysisCtx(options), sql1, sql1,
+            "SELECT ndv(id, 5), ndv(id, 5), ndv(id, 5) FROM functional.alltypes");
+
+    // CASE 2
+    options.setDefault_ndv_scale(5).setAppx_count_distinct(false);
+    assertToSql(createAnalysisCtx(options), sql1, sql1,
+            "SELECT ndv(id, 5), ndv(id, 5), count(DISTINCT id)" +
+                    " FROM functional.alltypes");
+
+    // CASE 3
+    options.setDefault_ndv_scale(9).setAppx_count_distinct(true);
+    assertToSql(createAnalysisCtx(options), sql1, sql1,
+            "SELECT ndv(id, 9), ndv(id, 5), ndv(id, 9) FROM functional.alltypes");
+
+    // CASE 4
+    options.setDefault_ndv_scale(3).setAppx_count_distinct(false);
+    assertToSql(createAnalysisCtx(options), sql1, sql1,
+            "SELECT ndv(id, 3), ndv(id, 5), count(DISTINCT id) " +
+                    "FROM functional.alltypes");
+
+    // CASE 5
+    options.setDefault_ndv_scale(2).setAppx_count_distinct(true);
+    assertToSql(createAnalysisCtx(options), sql1, sql1,
+            "SELECT ndv(id), ndv(id, 5), ndv(id) FROM functional.alltypes");
+
+    // CASE 6
+    options.setDefault_ndv_scale(2).setAppx_count_distinct(false);
+    assertToSql(createAnalysisCtx(options), sql1, sql1,
+            "SELECT ndv(id), ndv(id, 5), count(DISTINCT id) FROM functional.alltypes");
+
+  }
 }