You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2021/04/22 05:34:19 UTC
[impala] 02/02: IMPALA-10445: Adjust NDV's scale with query option
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 1fb7dbac0d43f3ccbbbbaaf9c41db10d3320fc48
Author: fifteencai <fi...@tencent.com>
AuthorDate: Fri Jan 22 12:40:54 2021 +0800
IMPALA-10445: Adjust NDV's scale with query option
This is a new way to control NDV's scale.
Since IMPALA-2658, we can trade memory for more accurate
estimation by setting larger `scale` in SQL function
NDV(<expr>, <scale>). However the use of larger NDV scale requires
the modification of SQL queries which may not be practical in certain
applications:
- Firstly, SQL writers are reluctant to lower that scale. They prone
to fill up the scale, which will make the cluster unstable. Especially
when there are `group by`s with high cardinalities. So it is wiser to
let cluster admin other than sql writer choose appropriate scale.
- Secondly, In some application scenarios, queries are stored in DBs.
In a BI system, for example, rewriting thousands of SQLs is risky.
In this commit, we introduced a new Query Option `DEFAULT_NDV_SCALE`
with the following semantics:
1. The allowed value is in the range [1..10];
2. Previously, the scale used in NDV(<expr>) functions was fixed at 2.
Now the scale is provided by the newly added query options.
3. It does not influence the NDV scale for SQL function
NDV(<expr>, <scale>) in which the NDV scale is provided by the 2nd
argument <scale>.
We also refactored method `Analyze` to make sure APPX_COUNT_DISTINCT
can work with this query option. After this, cluster admins can
substitute `count(distinct <expr>)` with `ndv(<expr>, scale)`.
Implementation details:
- The default value of DEFAULT_NDV_SCALE is 2, so we won't change
the default ndv behavior.
- We port `CountDistinctToNdv` transform logic from
`SelectStmt.analyze()` to `ExprRewriter`, making it compatible with
further rewrite rules.
- The newly added rewrite rule `DefaultNdvScaleRule` is applied
after `CountDistinctToNdvRule`.
Usage:
To set a default ndv scale:
```
SET DEFAULT_NDV_SCALE = 10;
```
To unset:
```
SET DEFAULT_NDV_SCALE = 2;
```
Here are test results of a typical workload (cardinality=40,090,650):
+====================================================================+
| Metric | Count Distinct | NDV2 | NDV5 | NDV10 |
+--------------------------------------------------------------------+
| Memory(GB) | 3.83 | 1.84 | 1.85 | 1.89 |
| Duration(s) | 182.89 | 30.22 | 29.72 | 29.24 |
| ErrorRate | 0% | 1.8% | 1.17% | 0.06% |
+====================================================================+
Testing:
1) Added 3 unit test cases in `ExprRewriteRulesTest`.
2) Added 5 unit test cases in `ExprRewriterTest`.
3) Ran all front-end unit test, passed.
4) Added a new query-option test.
Change-Id: I1669858a6e8252e167b464586e8d0b6cb0d0bd50
Reviewed-on: http://gerrit.cloudera.org:8080/17306
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/service/query-options-test.cc | 1 +
be/src/service/query-options.cc | 13 ++++
be/src/service/query-options.h | 5 +-
common/thrift/ImpalaService.thrift | 4 +
common/thrift/Query.thrift | 3 +
.../java/org/apache/impala/analysis/Analyzer.java | 6 +-
.../org/apache/impala/analysis/SelectStmt.java | 31 --------
.../impala/rewrite/CountDistinctToNdvRule.java | 66 +++++++++++++++++
.../apache/impala/rewrite/DefaultNdvScaleRule.java | 73 ++++++++++++++++++
.../impala/analysis/ExprRewriteRulesTest.java | 60 +++++++++++++++
.../apache/impala/analysis/ExprRewriterTest.java | 86 ++++++++++++++++++++++
11 files changed, 314 insertions(+), 34 deletions(-)
diff --git a/be/src/service/query-options-test.cc b/be/src/service/query-options-test.cc
index 0759f5c..c6a706f 100644
--- a/be/src/service/query-options-test.cc
+++ b/be/src/service/query-options-test.cc
@@ -259,6 +259,7 @@ TEST(QueryOptions, SetIntOptions) {
{MIN_STATEMENT_EXPRESSION_LIMIT, I32_MAX}},
{MAKE_OPTIONDEF(max_cnf_exprs), {-1, I32_MAX}},
{MAKE_OPTIONDEF(max_fs_writers), {0, I32_MAX}},
+ {MAKE_OPTIONDEF(default_ndv_scale), {1, 10}},
};
for (const auto& test_case : case_set) {
const OptionDef<int32_t>& option_def = test_case.first;
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index 35b0610..7fb68a2 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -1063,6 +1063,19 @@ Status impala::SetQueryOption(const string& key, const string& value,
query_options->__set_analytic_rank_pushdown_threshold(val);
break;
}
+ case TImpalaQueryOptions::DEFAULT_NDV_SCALE: {
+ StringParser::ParseResult result;
+ const int32_t scale =
+ StringParser::StringToInt<int32_t>(value.c_str(), value.length(), &result);
+ if (value == nullptr || result != StringParser::PARSE_SUCCESS || scale < 1 ||
+ scale > 10) {
+ return Status(
+ Substitute("Invalid NDV scale: '$0'. "
+ "Only integer value in [1, 10] is allowed.", value));
+ }
+ query_options->__set_default_ndv_scale(scale);
+ break;
+ }
default:
if (IsRemovedQueryOption(key)) {
LOG(WARNING) << "Ignoring attempt to set removed query option '" << key << "'";
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index c4cda49..f2da309 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -47,7 +47,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
// time we add or remove a query option to/from the enum TImpalaQueryOptions.
#define QUERY_OPTS_TABLE\
DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(),\
- TImpalaQueryOptions::SHOW_COLUMN_MINMAX_STATS + 1);\
+ TImpalaQueryOptions::DEFAULT_NDV_SCALE + 1);\
REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED)\
QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR)\
REMOVED_QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS)\
@@ -242,7 +242,8 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
TQueryOptionLevel::ADVANCED)\
QUERY_OPT_FN(show_column_minmax_stats, SHOW_COLUMN_MINMAX_STATS,\
TQueryOptionLevel::ADVANCED)\
- ;
+ QUERY_OPT_FN(default_ndv_scale, DEFAULT_NDV_SCALE, TQueryOptionLevel::ADVANCED)
+;
/// Enforce practical limits on some query options to avoid undesired query state.
static const int64_t SPILLABLE_BUFFER_LIMIT = 1LL << 40; // 1 TB
diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift
index e998c46..da079c5 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -652,6 +652,10 @@ enum TImpalaQueryOptions {
// If true, show the min and max stats during show column stats.
SHOW_COLUMN_MINMAX_STATS = 125
+
+ // Default NDV scale settings, make it easier to change scale in SQL function
+ // NDV(<expr>).
+ DEFAULT_NDV_SCALE = 126
}
// The summary of a DML statement.
diff --git a/common/thrift/Query.thrift b/common/thrift/Query.thrift
index d2d8e7d..5dd288f 100644
--- a/common/thrift/Query.thrift
+++ b/common/thrift/Query.thrift
@@ -484,6 +484,9 @@ struct TQueryOptions {
// See comment in ImpalaService.thrift
126: optional bool show_column_minmax_stats = false;
+
+ // Default NDV scale
+ 127: optional i32 default_ndv_scale = 2;
}
// Impala currently has three types of sessions: Beeswax, HiveServer2 and external
diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 3af6bda..916de22 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -81,6 +81,8 @@ import org.apache.impala.rewrite.NormalizeExprsRule;
import org.apache.impala.rewrite.SimplifyCastStringToTimestamp;
import org.apache.impala.rewrite.SimplifyConditionalsRule;
import org.apache.impala.rewrite.SimplifyDistinctFromRule;
+import org.apache.impala.rewrite.CountDistinctToNdvRule;
+import org.apache.impala.rewrite.DefaultNdvScaleRule;
import org.apache.impala.service.FeSupport;
import org.apache.impala.thrift.TAccessEvent;
import org.apache.impala.thrift.TCatalogObjectType;
@@ -282,7 +284,7 @@ public class Analyzer {
/**
* Check if the table supports the operation
* @param table Table need to check
- * @param operationType The type of operation
+ * @param type The type of operation
* @throws AnalysisException If the table does not support the operation
*/
public static void checkTableCapability(FeTable table, OperationType type)
@@ -526,6 +528,8 @@ public class Analyzer {
rules.add(NormalizeCountStarRule.INSTANCE);
rules.add(SimplifyDistinctFromRule.INSTANCE);
rules.add(SimplifyCastStringToTimestamp.INSTANCE);
+ rules.add(CountDistinctToNdvRule.INSTANCE);
+ rules.add(DefaultNdvScaleRule.INSTANCE);
}
exprRewriter_ = new ExprRewriter(rules);
}
diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index 34232bc..8acdcb6 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -300,7 +300,6 @@ public class SelectStmt extends QueryStmt {
private final Analyzer analyzer_;
private List<Expr> groupingExprsCopy_;
private List<FunctionCallExpr> aggExprs_;
- private ExprSubstitutionMap ndvSmap_;
private ExprSubstitutionMap countAllMap_;
private SelectAnalyzer(Analyzer analyzer) {
@@ -326,7 +325,6 @@ public class SelectStmt extends QueryStmt {
verifyAggSemantics();
analyzeGroupingExprs();
collectAggExprs();
- rewriteCountDistinct();
buildAggregateExprs();
buildResultExprs();
verifyAggregation();
@@ -859,34 +857,6 @@ public class SelectStmt extends QueryStmt {
}
}
- private void rewriteCountDistinct() {
- // Optionally rewrite all count(distinct <expr>) into equivalent NDV() calls.
- if (!analyzer_.getQueryCtx().client_request.query_options.appx_count_distinct) {
- return;
- }
- ndvSmap_ = new ExprSubstitutionMap();
- for (FunctionCallExpr aggExpr: aggExprs_) {
- if (!aggExpr.isDistinct()
- || !aggExpr.getFnName().getFunction().equals("count")
- || aggExpr.getParams().size() != 1) {
- continue;
- }
- FunctionCallExpr ndvFnCall =
- new FunctionCallExpr("ndv", aggExpr.getParams().exprs());
- ndvFnCall.analyzeNoThrow(analyzer_);
- Preconditions.checkState(ndvFnCall.getType().equals(aggExpr.getType()));
- ndvSmap_.put(aggExpr, ndvFnCall);
- }
- // Replace all count(distinct <expr>) with NDV(<expr>).
- List<Expr> substAggExprs = Expr.substituteList(aggExprs_,
- ndvSmap_, analyzer_, false);
- aggExprs_.clear();
- for (Expr aggExpr: substAggExprs) {
- Preconditions.checkState(aggExpr instanceof FunctionCallExpr);
- aggExprs_.add((FunctionCallExpr) aggExpr);
- }
- }
-
private void buildAggregateExprs() throws AnalysisException {
// When DISTINCT aggregates are present, non-distinct (i.e. ALL) aggregates are
// evaluated in two phases (see AggregateInfo for more details). In particular,
@@ -900,7 +870,6 @@ public class SelectStmt extends QueryStmt {
// i) There is no GROUP-BY clause, and
// ii) Other DISTINCT aggregates are present.
countAllMap_ = createCountAllMap();
- countAllMap_ = ExprSubstitutionMap.compose(ndvSmap_, countAllMap_, analyzer_);
List<Expr> substitutedAggs =
Expr.substituteList(aggExprs_, countAllMap_, analyzer_, false);
aggExprs_.clear();
diff --git a/fe/src/main/java/org/apache/impala/rewrite/CountDistinctToNdvRule.java b/fe/src/main/java/org/apache/impala/rewrite/CountDistinctToNdvRule.java
new file mode 100644
index 0000000..5642062
--- /dev/null
+++ b/fe/src/main/java/org/apache/impala/rewrite/CountDistinctToNdvRule.java
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.rewrite;
+
+import org.apache.impala.analysis.Expr;
+import org.apache.impala.analysis.FunctionCallExpr;
+import org.apache.impala.common.AnalysisException;
+import org.apache.impala.analysis.Analyzer;
+
+/*
+ * Rewrite rule to replace count distinct to ndv
+ *
+ */
+public class CountDistinctToNdvRule implements ExprRewriteRule{
+
+ // Singleton
+ public static CountDistinctToNdvRule INSTANCE = new CountDistinctToNdvRule();
+
+ /*
+ * This is an implementation of IMPALA-110.
+ * Replace count distinct operators to NDVs if APPX_COUNT_DISTINCT is set.
+ * Will traverse the expression tree and return modified expr.
+ *
+ * @param expr expr to be checked
+ * @param analyzer query analyzer
+ */
+ @Override
+ public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException {
+ // Short-circuit rules
+ if (!analyzer.getQueryCtx().client_request.query_options.appx_count_distinct) {
+ return expr;
+ }
+ if (!(expr instanceof FunctionCallExpr) || !expr.isAnalyzed()) return expr;
+
+ // Returns if expr is not count(distinct), or it has more 1 param.
+ FunctionCallExpr oldFunctionCallExpr = (FunctionCallExpr) expr;
+ if (!oldFunctionCallExpr.getFnName().getFunction().equals("count")
+ || !oldFunctionCallExpr.isDistinct()
+ || oldFunctionCallExpr.getParams().exprs().size() != 1 ) {
+ return expr;
+ }
+
+ // Create a new function `ndv(<expr>)` to substitute the `count(distinct <expr>)`
+ FunctionCallExpr ndvFunc = new FunctionCallExpr("ndv",
+ oldFunctionCallExpr.getParams().exprs());
+
+ // Analyze the newly added FunctionCall, otherwise follow-up rules won't fire.
+ ndvFunc.analyze(analyzer);
+ return ndvFunc;
+ }
+}
diff --git a/fe/src/main/java/org/apache/impala/rewrite/DefaultNdvScaleRule.java b/fe/src/main/java/org/apache/impala/rewrite/DefaultNdvScaleRule.java
new file mode 100644
index 0000000..40aac66
--- /dev/null
+++ b/fe/src/main/java/org/apache/impala/rewrite/DefaultNdvScaleRule.java
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.rewrite;
+
+import org.apache.impala.analysis.Analyzer;
+import org.apache.impala.analysis.Expr;
+import org.apache.impala.analysis.FunctionCallExpr;
+import org.apache.impala.analysis.NumericLiteral;
+import org.apache.impala.catalog.Type;
+import org.apache.impala.common.AnalysisException;
+
+import java.math.BigDecimal;
+import java.util.List;
+
+/**
+ * Rewrite rule to set NDV call's default scale.
+ *
+ * If DEFAULT_NDV_SCALE query option is set. This rule will rewrite the NDV calls
+ * to Scaled NDV calls. Note this rule works with APPX_COUNT_DISTINCT option. If
+ * APPX_COUNT_DISTINCT is set. Any count(distinct) call will be firstly converted
+ * to NDV, and then converted to NDV(scale).
+ */
+public class DefaultNdvScaleRule implements ExprRewriteRule{
+ // Singleton
+ public static DefaultNdvScaleRule INSTANCE = new DefaultNdvScaleRule();
+
+ /**
+ * Applies this rewrite rule to the given analyzed Expr. Returns the transformed and
+ * analyzed Expr or the original unmodified Expr if no changes were made. If any
+ * changes were made, the new Expr is guaranteed to be a different Expr object,
+ * so callers can rely on object reference comparison for change detection.
+ *
+ * @param expr expr to be checked
+ * @param analyzer default analyzer
+ */
+ @Override
+ public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException {
+ // Short-circuit rules
+ if (!(expr instanceof FunctionCallExpr) || !expr.isAnalyzed()) return expr;
+ int scale = analyzer.getQueryOptions().getDefault_ndv_scale();
+ if (scale > 10 || scale < 1 || scale == 2) return expr;
+
+ // Returns if expr is not ndv, or it is scaled ndv already.
+ FunctionCallExpr oldFunctionCallExpr = (FunctionCallExpr) expr;
+ if (!oldFunctionCallExpr.getFnName().getFunction().equals("ndv")
+ || oldFunctionCallExpr.getParams().exprs().size() > 1) {
+ return expr;
+ }
+
+ // Do substitution
+ List<Expr> params = oldFunctionCallExpr.getParams().exprs();
+ params.add(new NumericLiteral(BigDecimal.valueOf(scale), Type.INT));
+ FunctionCallExpr functionCall = new FunctionCallExpr("ndv", params);
+ functionCall.analyze(analyzer);
+
+ return functionCall;
+ }
+}
diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
index 24dce45..d91a53c 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
@@ -177,6 +177,19 @@ public class ExprRewriteRulesTest extends FrontendTestBase {
return RewritesOk(tableName, exprStr, Lists.newArrayList(rule), expectedExprStr);
}
+ /**
+ * Given a list of `rule`, this function checks whether the rewritten <expr> is as
+ * expected.
+ *
+ * If no rule in `rules` is expected to be fired, callers should set expectedExprStr
+ * to null or "NULL". Otherwise, this function would throw an exception like
+ * "Rule xxx didn't fire"
+ *
+ * @param exprStr: origin expr
+ * @param rules: list of rewrite rules
+ * @param expectedExprStr: expected expr
+ * @return: Expr: rewritten expr
+ */
public Expr RewritesOk(String exprStr, List<ExprRewriteRule> rules,
String expectedExprStr)
throws ImpalaException {
@@ -774,6 +787,53 @@ public class ExprRewriteRulesTest extends FrontendTestBase {
}
@Test
+ public void testCountDistinctToNdvRule() throws ImpalaException {
+ List<ExprRewriteRule> rules = Lists.newArrayList(
+ org.apache.impala.rewrite.CountDistinctToNdvRule.INSTANCE
+ );
+ session.options().setAppx_count_distinct(true);
+ RewritesOk("count(distinct bool_col)", rules, "ndv(bool_col)");
+ }
+
+ @Test
+ public void testDefaultNdvScaleRule() throws ImpalaException {
+ List<ExprRewriteRule> rules = Lists.newArrayList(
+ org.apache.impala.rewrite.DefaultNdvScaleRule.INSTANCE
+ );
+ session.options().setDefault_ndv_scale(10);
+ RewritesOk("ndv(bool_col)", rules, "ndv(bool_col, 10)");
+ }
+
+ @Test
+ public void testDefaultNdvScaleRuleNotSet() throws ImpalaException {
+ List<ExprRewriteRule> rules = Lists.newArrayList(
+ org.apache.impala.rewrite.DefaultNdvScaleRule.INSTANCE
+ );
+ RewritesOk("ndv(bool_col)", rules, null);
+ }
+
+ @Test
+ public void testDefaultNdvScaleRuleSetDefault() throws ImpalaException {
+ List<ExprRewriteRule> rules = Lists.newArrayList(
+ org.apache.impala.rewrite.DefaultNdvScaleRule.INSTANCE
+ );
+ session.options().setDefault_ndv_scale(2);
+ RewritesOk("ndv(bool_col)", rules, null);
+ }
+
+
+ @Test
+ public void testCountDistinctToNdvAndDefaultNdvScaleRule() throws ImpalaException {
+ List<ExprRewriteRule> rules = Lists.newArrayList(
+ org.apache.impala.rewrite.CountDistinctToNdvRule.INSTANCE,
+ org.apache.impala.rewrite.DefaultNdvScaleRule.INSTANCE
+ );
+ session.options().setAppx_count_distinct(true);
+ session.options().setDefault_ndv_scale(10);
+ RewritesOk("count(distinct bool_col)", rules, "ndv(bool_col, 10)");
+ }
+
+ @Test
public void testSimplifyCastStringToTimestamp() throws ImpalaException {
ExprRewriteRule rule = SimplifyCastStringToTimestamp.INSTANCE;
diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
index f455198..71a5f4d 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
@@ -500,4 +500,90 @@ public class ExprRewriterTest extends AnalyzerTest {
Assert.assertEquals("Bad sql with implicit casts from original query:\n" + query,
expectedToSqlWithImplicitCasts, actual);
}
+
+ @Test
+ public void TestToSqlWithAppxCountDistinctAndDefaultNdvs() {
+ TQueryOptions options = new TQueryOptions();
+ options.setEnable_expr_rewrites(true);
+
+ AnalysisContext ctx = createAnalysisCtx(options);
+
+ //----------------------
+ // Test query rewrites.
+ //----------------------
+ String countDistinctSql = "SELECT count(DISTINCT id) FROM functional.alltypes";
+
+ // No rewrite
+ assertToSql(ctx, countDistinctSql, countDistinctSql, countDistinctSql);
+
+ // Rewrite to ndv
+ options.setAppx_count_distinct(true);
+ assertToSql(createAnalysisCtx(options), countDistinctSql, countDistinctSql,
+ "SELECT ndv(id) FROM functional.alltypes");
+
+ // Rewrite to ndv(10)
+ options.setDefault_ndv_scale(10);
+ assertToSql(createAnalysisCtx(options), countDistinctSql, countDistinctSql,
+ "SELECT ndv(id, 10) FROM functional.alltypes");
+
+ String ndvSql = "SELECT ndv(id) FROM functional.alltypes";
+
+ // No rewrite
+ options.setDefault_ndv_scale(2);
+ assertToSql(createAnalysisCtx(options), ndvSql, ndvSql, ndvSql);
+
+ // Rewrite ndv scale
+ options.setDefault_ndv_scale(9);
+ assertToSql(createAnalysisCtx(options), ndvSql, ndvSql,
+ "SELECT ndv(id, 9) FROM functional.alltypes");
+
+ //-----------------------------------------------------------------------------------
+ // Test complex sql which has all 3 types of functions.
+ // NDV(<expr>) | NDV(<expr>, <scale>) | COUNT(DISTINCT <expr>)
+ //
+ // For coverage, we have these scenarios:
+ // CASE 1: DEFAULT_NDV_SCALE=5(same as the value in original sql),
+ // APPX_COUNT_DISTINCT=True
+ // CASE 2: DEFAULT_NDV_SCALE=5, APPX_COUNT_DISTINCT=False
+ // CASE 3: DEFAULT_NDV_SCALE=9(different with original), APPX_COUNT_DISTINCT=True
+ // CASE 4: DEFAULT_NDV_SCALE=3, APPX_COUNT_DISTINCT=False
+ // CASE 5: DEFAULT_NDV_SCALE=2, APPX_COUNT_DISTINCT=True
+ // CASE 6: DEFAULT_NDV_SCALE=2, APPX_COUNT_DISTINCT=False
+ //-----------------------------------------------------------------------------------
+ String sql1 = "SELECT ndv(id), ndv(id, 5), count(DISTINCT id) FROM " +
+ "functional.alltypes";
+
+ // CASE 1
+ options.setDefault_ndv_scale(5).setAppx_count_distinct(true);
+ assertToSql(createAnalysisCtx(options), sql1, sql1,
+ "SELECT ndv(id, 5), ndv(id, 5), ndv(id, 5) FROM functional.alltypes");
+
+ // CASE 2
+ options.setDefault_ndv_scale(5).setAppx_count_distinct(false);
+ assertToSql(createAnalysisCtx(options), sql1, sql1,
+ "SELECT ndv(id, 5), ndv(id, 5), count(DISTINCT id)" +
+ " FROM functional.alltypes");
+
+ // CASE 3
+ options.setDefault_ndv_scale(9).setAppx_count_distinct(true);
+ assertToSql(createAnalysisCtx(options), sql1, sql1,
+ "SELECT ndv(id, 9), ndv(id, 5), ndv(id, 9) FROM functional.alltypes");
+
+ // CASE 4
+ options.setDefault_ndv_scale(3).setAppx_count_distinct(false);
+ assertToSql(createAnalysisCtx(options), sql1, sql1,
+ "SELECT ndv(id, 3), ndv(id, 5), count(DISTINCT id) " +
+ "FROM functional.alltypes");
+
+ // CASE 5
+ options.setDefault_ndv_scale(2).setAppx_count_distinct(true);
+ assertToSql(createAnalysisCtx(options), sql1, sql1,
+ "SELECT ndv(id), ndv(id, 5), ndv(id) FROM functional.alltypes");
+
+ // CASE 6
+ options.setDefault_ndv_scale(2).setAppx_count_distinct(false);
+ assertToSql(createAnalysisCtx(options), sql1, sql1,
+ "SELECT ndv(id), ndv(id, 5), count(DISTINCT id) FROM functional.alltypes");
+
+ }
}