You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by tm...@apache.org on 2018/10/17 17:03:01 UTC

[06/11] impala git commit: IMPALA-7702: enable fetch incremental stats by default

IMPALA-7702: enable fetch incremental stats by default

Flips the default from always off to always on for
--pull_incremental_statistics. With this setting, the default
is for coordinators to fetch incremental stats from catalogd
directly (only when computing incremental stats) instead of
receiving it from the statestore broadcast.

Fetching incremental stats is not applicable when using a
CatalogMetaProvider. By making fetch the default, it would
require that --pull_incremental_statistics is set to false
when enabling CatalogMetaProvider. This change makes
--use_local_catalog to take priority over --pull_incremental_statistics
so that when both are turned on, only the local catalog setting
is enabled.

Testing:
- manual testing
- moved the testing for pull incremental stats out of custom cluster
  tests since the default flipped
- added tests that run with local catalog and pulling incremental stats.

Change-Id: I5601a24f81bb3466cff5308c7093d2765bb1c481
Reviewed-on: http://gerrit.cloudera.org:8080/11677
Reviewed-by: Vuk Ercegovac <ve...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/ad265842
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/ad265842
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/ad265842

Branch: refs/heads/master
Commit: ad265842b691927e1c204203390172a92dc38a68
Parents: ec2daba
Author: Vuk Ercegovac <ve...@cloudera.com>
Authored: Thu Oct 11 22:41:20 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Tue Oct 16 10:12:03 2018 +0000

----------------------------------------------------------------------
 be/src/common/global-flags.cc                   |  4 +-
 .../impala/analysis/ComputeStatsStmt.java       |  5 ++
 tests/custom_cluster/test_incremental_stats.py  | 46 +++++++++++
 tests/custom_cluster/test_pull_stats.py         | 84 --------------------
 tests/metadata/test_compute_stats.py            | 49 ++++++++++++
 5 files changed, 102 insertions(+), 86 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/ad265842/be/src/common/global-flags.cc
----------------------------------------------------------------------
diff --git a/be/src/common/global-flags.cc b/be/src/common/global-flags.cc
index 2ea1ca5..d6f1d04 100644
--- a/be/src/common/global-flags.cc
+++ b/be/src/common/global-flags.cc
@@ -210,11 +210,11 @@ DEFINE_bool_hidden(disable_catalog_data_ops_debug_only, false,
 // same way, is error prone. One fix for this flag is to set it only on
 // catalogd, propagate the setting as a property of the Catalog object, and let
 // impalad uses act on this setting.
-DEFINE_bool(pull_incremental_statistics, false,
+DEFINE_bool(pull_incremental_statistics, true,
     "When set, impalad coordinators pull incremental statistics from catalogd on-demand "
     "and catalogd does not broadcast incremental statistics via statestored to "
     "coordinators. If used, the flag must be set on both catalogd and all impalad "
-    "coordinators.");
+    "coordinators. This feature should not be used when --use_local_catalog is true.");
 
 DEFINE_int32(invalidate_tables_timeout_s, 0, "If a table has not been referenced in a "
     "SQL statement for more than the configured amount of time, the catalog server will "

http://git-wip-us.apache.org/repos/asf/impala/blob/ad265842/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java b/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
index 24f387c..a85a946 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
@@ -615,7 +615,12 @@ public class ComputeStatsStmt extends StatementBase {
     int expectedNumStats = partitions.size() - excludedPartitions.size();
     Preconditions.checkArgument(expectedNumStats >= 0);
 
+    // Incremental stats are fetched only when configured to do so except
+    // when also using a local catalog or when testing. When using a local
+    // catalog, it makes more sense to use the getPartitions api which is
+    // designed to fetch specific fields and specific partitions.
     if (BackendConfig.INSTANCE.pullIncrementalStatistics()
+        && !BackendConfig.INSTANCE.getBackendCfg().use_local_catalog
         && !RuntimeEnv.INSTANCE.isTestEnv()) {
       // We're configured to fetch the statistics from catalogd, so collect the relevant
       // partition ids.

http://git-wip-us.apache.org/repos/asf/impala/blob/ad265842/tests/custom_cluster/test_incremental_stats.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_incremental_stats.py b/tests/custom_cluster/test_incremental_stats.py
new file mode 100644
index 0000000..e58d5fd
--- /dev/null
+++ b/tests/custom_cluster/test_incremental_stats.py
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
+
+class TestIncrementalStatistics(CustomClusterTestSuite):
+  """Tests flag that controls pulling directly from catalogd using the
+  --pull_incremental_statistics flag."""
+
+  @classmethod
+  def get_workload(self):
+    return 'functional-query'
+
+  @pytest.mark.execute_serially
+  @CustomClusterTestSuite.with_args(impalad_args="--pull_incremental_statistics=false",
+                                    catalogd_args="--pull_incremental_statistics=false")
+  def test_push_stats(self, vector, unique_database):
+    """
+    Tests compute incremental stats when incremental stats are pushed via statestore.
+    """
+    self.run_test_case('QueryTest/compute-stats-incremental', vector, unique_database)
+
+  @pytest.mark.execute_serially
+  @CustomClusterTestSuite.with_args(
+    impalad_args="--pull_incremental_statistics=true --use_local_catalog=true",
+    catalogd_args="--pull_incremental_statistics=true --catalog_topic_mode=minimal")
+  def test_with_local_catalog(self, vector, unique_database):
+    """
+    Tests that when local catalog is used, the pull incremental stats flag has no effect.
+    """
+    self.run_test_case('QueryTest/compute-stats-incremental', vector, unique_database)

http://git-wip-us.apache.org/repos/asf/impala/blob/ad265842/tests/custom_cluster/test_pull_stats.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_pull_stats.py b/tests/custom_cluster/test_pull_stats.py
deleted file mode 100644
index b852f3d..0000000
--- a/tests/custom_cluster/test_pull_stats.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import pytest
-from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
-
-class TestPullStatistics(CustomClusterTestSuite):
-  """Tests incremental statistics when pulling directly from catalogd
-  using the --pull_incremental_statistics flag."""
-
-  @classmethod
-  def get_workload(self):
-    return 'functional-query'
-
-  @pytest.mark.execute_serially
-  @CustomClusterTestSuite.with_args(impalad_args="--pull_incremental_statistics=true",
-                                    catalogd_args="--pull_incremental_statistics=true")
-  def test_pull_stats(self, vector, unique_database):
-    self.run_test_case('QueryTest/compute-stats-incremental', vector, unique_database)
-
-  @pytest.mark.execute_serially
-  @CustomClusterTestSuite.with_args(impalad_args="--pull_incremental_statistics=true",
-                                    catalogd_args="--pull_incremental_statistics=true")
-  def test_pull_stats_profile(self, vector, unique_database):
-    """Checks that the frontend profile includes metrics when computing
-       incremental statistics.
-    """
-    try:
-      client = self.cluster.impalads[0].service.create_beeswax_client()
-      create = "create table test like functional.alltypes"
-      load = "insert into test partition(year, month) select * from functional.alltypes"
-      insert = """insert into test partition(year=2009, month=1) values
-                  (29349999, true, 4, 4, 4, 40,4.400000095367432,40.4,
-                  "10/21/09","4","2009-10-21 03:24:09.600000000")"""
-      stats_all = "compute incremental stats test"
-      stats_part = "compute incremental stats test partition (year=2009,month=1)"
-
-      # Checks that profile does not have metrics for incremental stats when
-      # the operation is not 'compute incremental stats'.
-      self.execute_query_expect_success(client, "use %s" % unique_database)
-      profile = self.execute_query_expect_success(client, create).runtime_profile
-      assert profile.count("StatsFetch") == 0
-      # Checks that incremental stats metrics are present when 'compute incremental
-      # stats' is run. Since the table has no stats, expect that no bytes are fetched.
-      self.execute_query_expect_success(client, load)
-      profile = self.execute_query_expect_success(client, stats_all).runtime_profile
-      assert profile.count("StatsFetch") > 1
-      assert profile.count("StatsFetch.CompressedBytes: 0") == 1
-      # Checks that bytes fetched is non-zero since incremental stats are present now
-      # and should have been fetched.
-      self.execute_query_expect_success(client, insert)
-      profile = self.execute_query_expect_success(client, stats_part).runtime_profile
-      assert profile.count("StatsFetch") > 1
-      assert profile.count("StatsFetch.CompressedBytes") == 1
-      assert profile.count("StatsFetch.CompressedBytes: 0") == 0
-      # Adds a partition, computes stats, and checks that the metrics in the profile
-      # reflect the operation.
-      alter = "alter table test add partition(year=2011, month=1)"
-      insert_new_partition = """
-          insert into test partition(year=2011, month=1) values
-          (29349999, true, 4, 4, 4, 40,4.400000095367432,40.4,
-          "10/21/09","4","2009-10-21 03:24:09.600000000")
-          """
-      self.execute_query_expect_success(client, alter)
-      self.execute_query_expect_success(client, insert_new_partition)
-      profile = self.execute_query_expect_success(client, stats_all).runtime_profile
-      assert profile.count("StatsFetch.TotalPartitions: 25") == 1
-      assert profile.count("StatsFetch.NumPartitionsWithStats: 24") == 1
-    finally:
-      client.close()

http://git-wip-us.apache.org/repos/asf/impala/blob/ad265842/tests/metadata/test_compute_stats.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_compute_stats.py b/tests/metadata/test_compute_stats.py
index 3a13859..c3d182d 100644
--- a/tests/metadata/test_compute_stats.py
+++ b/tests/metadata/test_compute_stats.py
@@ -18,6 +18,7 @@
 import pytest
 from subprocess import check_call
 
+from tests.common.impala_cluster import ImpalaCluster
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.skip import SkipIfS3, SkipIfADLS, SkipIfIsilon, SkipIfLocal
 from tests.common.test_dimensions import (
@@ -118,6 +119,54 @@ class TestComputeStats(ImpalaTestSuite):
     assert(len(show_result.data) == 2)
     assert("1\tpval\t8" in show_result.data[0])
 
+  def test_pull_stats_profile(self, vector, unique_database):
+    """Checks that the frontend profile includes metrics when computing
+       incremental statistics.
+    """
+    try:
+      client = ImpalaCluster().impalads[0].service.create_beeswax_client()
+      create = "create table test like functional.alltypes"
+      load = "insert into test partition(year, month) select * from functional.alltypes"
+      insert = """insert into test partition(year=2009, month=1) values
+                  (29349999, true, 4, 4, 4, 40,4.400000095367432,40.4,
+                  "10/21/09","4","2009-10-21 03:24:09.600000000")"""
+      stats_all = "compute incremental stats test"
+      stats_part = "compute incremental stats test partition (year=2009,month=1)"
+
+      # Checks that profile does not have metrics for incremental stats when
+      # the operation is not 'compute incremental stats'.
+      self.execute_query_expect_success(client, "use %s" % unique_database)
+      profile = self.execute_query_expect_success(client, create).runtime_profile
+      assert profile.count("StatsFetch") == 0
+      # Checks that incremental stats metrics are present when 'compute incremental
+      # stats' is run. Since the table has no stats, expect that no bytes are fetched.
+      self.execute_query_expect_success(client, load)
+      profile = self.execute_query_expect_success(client, stats_all).runtime_profile
+      assert profile.count("StatsFetch") > 1
+      assert profile.count("StatsFetch.CompressedBytes: 0") == 1
+      # Checks that bytes fetched is non-zero since incremental stats are present now
+      # and should have been fetched.
+      self.execute_query_expect_success(client, insert)
+      profile = self.execute_query_expect_success(client, stats_part).runtime_profile
+      assert profile.count("StatsFetch") > 1
+      assert profile.count("StatsFetch.CompressedBytes") == 1
+      assert profile.count("StatsFetch.CompressedBytes: 0") == 0
+      # Adds a partition, computes stats, and checks that the metrics in the profile
+      # reflect the operation.
+      alter = "alter table test add partition(year=2011, month=1)"
+      insert_new_partition = """
+          insert into test partition(year=2011, month=1) values
+          (29349999, true, 4, 4, 4, 40,4.400000095367432,40.4,
+          "10/21/09","4","2009-10-21 03:24:09.600000000")
+          """
+      self.execute_query_expect_success(client, alter)
+      self.execute_query_expect_success(client, insert_new_partition)
+      profile = self.execute_query_expect_success(client, stats_all).runtime_profile
+      assert profile.count("StatsFetch.TotalPartitions: 25") == 1
+      assert profile.count("StatsFetch.NumPartitionsWithStats: 24") == 1
+    finally:
+      client.close()
+
 # Tests compute stats on HBase tables. This test is separate from TestComputeStats,
 # because we want to use the existing machanism to disable running tests on hbase/none
 # based on the filesystem type (S3, Isilon, etc.).