You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/07/18 17:57:21 UTC

[impala] 01/02: IMPALA-8750: Fix profile observability tests

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 7364912088f66d2fb329d40b884763d1d1d63619
Author: Tamas Mate <tm...@cloudera.com>
AuthorDate: Fri Jul 12 19:11:43 2019 +0200

    IMPALA-8750: Fix profile observability tests
    
    IMPALA-8443 adds a test 'test_query_profile_contains_query_compilation_events'
    which can fail because there are two parts of the 'Query Compilation' events
    that can change based on the build environment.
    1) Whether the metadata is cached or not before the test starts.
    2) Whether 'lineage_event_log_dir' is configured on the cluster.
    
    This change covers these scenarios by splitting the tests into sepearate ones
    where the catalog cache is pre-evicted/pre-loaded and taking into consideration
    the current cluster configuration.
    
    Change-Id: I65a1e81870e808f0e261f8a6097efdcc6903912a
    Reviewed-on: http://gerrit.cloudera.org:8080/13851
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/common/impala_service.py         |  9 ++++++
 tests/query_test/test_observability.py | 54 ++++++++++++++++++++++++++++++----
 2 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/tests/common/impala_service.py b/tests/common/impala_service.py
index 7697724..e5aca37 100644
--- a/tests/common/impala_service.py
+++ b/tests/common/impala_service.py
@@ -84,6 +84,15 @@ class BaseImpalaService(object):
     return [metrics.get(metric_name, default_value)
             for metric_name, default_value in zip(metric_names, default_values)]
 
+  def get_flag_current_value(self, flag):
+    """Returns the value of the the given flag name from the Impala /varz debug webpage.
+    If the flag does not exist it returns None."""
+    varz = json.loads(self.read_debug_webpage("varz?json"))
+    for var in varz.get("flags"):
+      if var["name"] == flag:
+        return var["current"]
+    return None
+
   def wait_for_metric_value(self, metric_name, expected_value, timeout=10, interval=1):
     start_time = time()
     while (time() - start_time < timeout):
diff --git a/tests/query_test/test_observability.py b/tests/query_test/test_observability.py
index 6f397df..3d12033 100644
--- a/tests/query_test/test_observability.py
+++ b/tests/query_test/test_observability.py
@@ -279,18 +279,60 @@ class TestObservability(ImpalaTestSuite):
     assert results.runtime_profile.count("AGGREGATION_NODE") == 2
     assert results.runtime_profile.count("PLAN_ROOT_SINK") == 2
 
-  def test_query_profile_contains_query_compilation_events(self):
-    """Test that the expected events show up in a query profile.
-       If the table metadata is not cached this test will fail, as the metadata load
-       creates lines dynamically."""
-    event_regexes = [r'Query Compilation:',
-        r'Metadata of all .* tables cached:',
+  def test_query_profile_contains_query_compilation_static_events(self):
+    """Test that the expected events show up in a query profile. These lines are static
+    and should appear in this exact order."""
+    event_regexes = [
         r'Analysis finished:',
         r'Authorization finished (.*):',
         r'Value transfer graph computed:',
         r'Single node plan created:',
         r'Runtime filters computed:',
+        r'Distributed plan created:']
+    query = "select * from functional.alltypes"
+    runtime_profile = self.execute_query(query).runtime_profile
+    self.__verify_profile_event_sequence(event_regexes, runtime_profile)
+
+  def test_query_profile_contains_query_compilation_metadata_load_events(self):
+    """Test that the Metadata load started and finished events appear in the query
+    profile when Catalog cache is evicted."""
+    invalidate_query = "invalidate metadata functional.alltypes"
+    select_query = "select * from functional.alltypes"
+    self.execute_query(invalidate_query).runtime_profile
+    runtime_profile = self.execute_query(select_query).runtime_profile
+    event_regexes = [r'Query Compilation:',
+        r'Metadata load started:',
+        r'Metadata load finished. loaded-tables=.*/.* load-requests=.* '
+            r'catalog-updates=.*:',
+        r'Analysis finished:']
+    self.__verify_profile_event_sequence(event_regexes, runtime_profile)
+
+  def test_query_profile_contains_query_compilation_metadata_cached_event(self):
+    """Test that the Metadata cache available event appears in the query profile when
+    the table is cached."""
+    refresh_query = "refresh functional.alltypes"
+    select_query = "select * from functional.alltypes"
+    self.execute_query(refresh_query).runtime_profile
+    runtime_profile = self.execute_query(select_query).runtime_profile
+    event_regexes = [r'Query Compilation:',
+        r'Metadata of all .* tables cached:',
+        r'Analysis finished:']
+    self.__verify_profile_event_sequence(event_regexes, runtime_profile)
+
+  def test_query_profile_contains_query_compilation_lineage_event(self):
+    """Test that the lineage information appears in the profile in the right place. This
+    event depends on whether the lineage_event_log_dir is configured."""
+    impalad = self.impalad_test_service
+    lineage_event_log_dir_value = impalad.get_flag_current_value("lineage_event_log_dir")
+    assert lineage_event_log_dir_value is not None
+    if lineage_event_log_dir_value == "":
+      event_regexes = [
+        r'Distributed plan created:',
+        r'Planning finished:']
+    else:
+      event_regexes = [
         r'Distributed plan created:',
+        r'Lineage info computed:',
         r'Planning finished:']
     query = "select * from functional.alltypes"
     runtime_profile = self.execute_query(query).runtime_profile