You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2021/03/26 03:15:43 UTC

[impala] branch master updated (e3bafcb -> 0b79464)

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from e3bafcb  IMPALA-10590: Introduce admission service heartbeat mechanism
     new 5b27b7c  IMPALA-10598: Deflake test_cache_reload_validation
     new 281a47c  IMPALA-10564 (part 2): Fixed test_ctas_exprs failure for S3 build
     new 0b79464  IMPALA-10397: Fix test_single_workload

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 tests/custom_cluster/test_auto_scaling.py | 13 ++++++-------
 tests/query_test/test_decimal_queries.py  | 30 +++++++++++++++++++++---------
 tests/query_test/test_hdfs_caching.py     | 18 ++++++++++++++++--
 3 files changed, 43 insertions(+), 18 deletions(-)

[impala] 01/03: IMPALA-10598: Deflake test_cache_reload_validation

Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 5b27b7ca7232a17d2a099f8567553004248989f2
Author: Vihang Karajgaonkar <vi...@apache.org>
AuthorDate: Fri Mar 19 14:00:19 2021 -0700

    IMPALA-10598: Deflake test_cache_reload_validation
    
    This patch deflakes the test test_cache_reload_validation in
    test_hdfs_caching.py e2e test. The util method which the test relies on to
    get the count of list of cache directives by parsing the output of command
    "hdfs cacheadmin -listDirectives -stats" does not consider that the output
    may contain trailing new lines or headers. Hence the test fails because the
    expected number of cache directives does not match the number of lines
    of the output.
    
    The fix parses the line "Found <int> entries" in the output when available
    and returns the count from that line. If the line is not found, it fallbacks
    to the earlier implementation of using the number of lines.
    
    Testing:
    1. The test was failing for me when run individually. After the patch, I looped
    the test 10 times without any errors.
    
    Change-Id: I2d491e90af461d5db3575a5840958d17ca90901c
    Reviewed-on: http://gerrit.cloudera.org:8080/17210
    Reviewed-by: Vihang Karajgaonkar <vi...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/query_test/test_hdfs_caching.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/tests/query_test/test_hdfs_caching.py b/tests/query_test/test_hdfs_caching.py
index 6a7eda3..0815e58 100644
--- a/tests/query_test/test_hdfs_caching.py
+++ b/tests/query_test/test_hdfs_caching.py
@@ -345,13 +345,27 @@ def get_num_cache_requests():
   def get_num_cache_requests_util():
     rc, stdout, stderr = exec_process("hdfs cacheadmin -listDirectives -stats")
     assert rc == 0, 'Error executing hdfs cacheadmin: %s %s' % (stdout, stderr)
-    return len(stdout.split('\n'))
+    # remove blank new lines from output count
+    lines = [line for line in stdout.split('\n') if line.strip()]
+    count = None
+    for line in lines:
+      if line.startswith("Found "):
+        # the line should say "Found <int> entries"
+        # if we find this line we parse the number of entries
+        # from this line.
+        count = int(re.search(r'\d+', line).group())
+        break
+    # if count is available we return it else we just
+    # return the total number of lines
+    if count is not None:
+      return count
+    else:
+      return len(stdout.split('\n'))
 
   # IMPALA-3040: This can take time, especially under slow builds like ASAN.
   wait_time_in_sec = build_flavor_timeout(5, slow_build_timeout=20)
   num_stabilization_attempts = 0
   max_num_stabilization_attempts = 10
-  new_requests = None
   num_requests = None
   LOG.info("{0} Entered get_num_cache_requests()".format(time.time()))
   while num_stabilization_attempts < max_num_stabilization_attempts:

[impala] 02/03: IMPALA-10564 (part 2): Fixed test_ctas_exprs failure for S3 build

Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 281a47caad1c7c53033ee1ce0affa35b8fd4d2d7
Author: wzhou-code <wz...@cloudera.com>
AuthorDate: Wed Mar 24 11:44:21 2021 -0700

    IMPALA-10564 (part 2): Fixed test_ctas_exprs failure for S3 build
    
    New test case TestDecimalOverflowExprs::test_ctas_exprs was added
    in the first patch for IMPALA-10564. But it failed in S3 build with
    Parquet format since the table was not successfully created when
    CTAS query failed.
    This patch fixed the test failure by skipping checking if NULL is
    inserted into table after CTAS failed for S3 build with Parquet.
    
    Testing:
     - Reproduced the test failure in local box with defaultFS as s3a.
       Verified the fixing was working with defaultFS as s3a.
     - Passed EE_TEST.
    
    Change-Id: Ia627ca70ed41764e86be348a0bc19e330b3334d2
    Reviewed-on: http://gerrit.cloudera.org:8080/17228
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/query_test/test_decimal_queries.py | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/tests/query_test/test_decimal_queries.py b/tests/query_test/test_decimal_queries.py
index 2dde7a4..62b5f77 100644
--- a/tests/query_test/test_decimal_queries.py
+++ b/tests/query_test/test_decimal_queries.py
@@ -25,6 +25,7 @@ from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.test_dimensions import (create_exec_option_dimension_from_dict,
     create_client_protocol_dimension, hs2_parquet_constraint)
 from tests.common.test_vector import ImpalaTestDimension
+from tests.util.filesystem_utils import IS_S3
 
 class TestDecimalQueries(ImpalaTestSuite):
   @classmethod
@@ -131,6 +132,7 @@ class TestDecimalOverflowExprs(ImpalaTestSuite):
   def test_ctas_exprs(self, vector, unique_database):
     TBL_NAME_1 = '`{0}`.`overflowed_decimal_tbl_1`'.format(unique_database)
     TBL_NAME_2 = '`{0}`.`overflowed_decimal_tbl_2`'.format(unique_database)
+    TBL_NAME_3 = '`{0}`.`overflowed_decimal_tbl_3`'.format(unique_database)
     if 'parquet' in str(vector.get_value('table_format')):
       stmt_1 = "CREATE TABLE {0} STORED AS PARQUET " \
           "AS SELECT 1 as i, cast(a*a*a as decimal (28,10)) as d_28 FROM " \
@@ -138,6 +140,7 @@ class TestDecimalOverflowExprs(ImpalaTestSuite):
       stmt_2 = "CREATE TABLE {0} STORED AS PARQUET " \
           "AS SELECT i, cast(d_28*d_28*d_28 as decimal (28,10)) as d_28 FROM {1} " \
           "WHERE d_28 is not null"
+      stmt_3 = "CREATE TABLE {0} (i int, d_28 decimal(28,10)) STORED AS PARQUET"
     elif 'kudu' in str(vector.get_value('table_format')):
       stmt_1 = "CREATE TABLE {0} PRIMARY KEY (i) STORED AS KUDU " \
           "AS SELECT 1 as i, cast(a*a*a as decimal (28,10)) as d_28 FROM " \
@@ -145,6 +148,7 @@ class TestDecimalOverflowExprs(ImpalaTestSuite):
       stmt_2 = "CREATE TABLE {0} PRIMARY KEY (i) STORED AS KUDU " \
           "AS SELECT i, cast(d_28*d_28*d_28 as decimal (28,10)) as d_28 FROM {1} " \
           "WHERE d_28 is not null"
+      stmt_3 = "CREATE TABLE {0} (i int primary key, d_28 decimal(28,10)) STORED AS KUDU"
     else:
       stmt_1 = "CREATE TABLE {0} " \
           "AS SELECT 1 as i, cast(a*a*a as decimal (28,10)) as d_28 FROM " \
@@ -152,9 +156,11 @@ class TestDecimalOverflowExprs(ImpalaTestSuite):
       stmt_2 = "CREATE TABLE {0} " \
           "AS SELECT i, cast(d_28*d_28*d_28 as decimal (28,10)) as d_28 FROM {1} " \
           "WHERE d_28 is not null"
+      stmt_3 = "CREATE TABLE {0} (i int, d_28 decimal(28,10))"
     query_1 = stmt_1.format(TBL_NAME_1)
     # CTAS with selection from another table.
-    query_2 = stmt_2.format(TBL_NAME_2, TBL_NAME_1)
+    query_2 = stmt_2.format(TBL_NAME_2, TBL_NAME_3)
+    query_3 = stmt_3.format(TBL_NAME_3)
 
     # Query_1 is aborted with error message "Decimal expression overflowed" and NULL is
     # not inserted into table.
@@ -166,14 +172,18 @@ class TestDecimalOverflowExprs(ImpalaTestSuite):
     except ImpalaBeeswaxException, e:
       assert "Decimal expression overflowed" in str(e)
 
-    result = self.execute_query_expect_success(self.client,
-        "SELECT count(*) FROM %s" % TBL_NAME_1)
-    assert int(result.get_data()) == 0
+    # TODO (IMPALA-10607): Following query failed for S3 build with Parquet table format.
+    if not ('parquet' in str(vector.get_value('table_format')) and IS_S3):
+      result = self.execute_query_expect_success(self.client,
+          "SELECT count(*) FROM %s" % TBL_NAME_1)
+      assert int(result.get_data()) == 0
 
-    # Insert data to table 1.
+    # Create table 3 and insert data to table 3.
+    self.execute_query_expect_success(self.client, "DROP TABLE IF EXISTS %s" % TBL_NAME_3)
+    self.execute_query_expect_success(self.client, query_3)
     self.execute_query_expect_success(self.client,
         "INSERT INTO TABLE %s VALUES(100, cast(654964569154.9565 as decimal (28,10)))" %
-        TBL_NAME_1)
+        TBL_NAME_3)
     # Query_2 is aborted with error message "Decimal expression overflowed" and NULL is
     # not inserted into table.
     self.execute_query_expect_success(self.client, "DROP TABLE IF EXISTS %s" % TBL_NAME_2)
@@ -183,6 +193,8 @@ class TestDecimalOverflowExprs(ImpalaTestSuite):
     except ImpalaBeeswaxException, e:
       assert "Decimal expression overflowed" in str(e)
 
-    result = self.execute_query_expect_success(self.client,
-        "SELECT count(*) FROM %s" % TBL_NAME_2)
-    assert int(result.get_data()) == 0
+    # TODO (IMPALA-10607): Following query failed for S3 build with Parquet table format.
+    if not ('parquet' in str(vector.get_value('table_format')) and IS_S3):
+      result = self.execute_query_expect_success(self.client,
+          "SELECT count(*) FROM %s" % TBL_NAME_2)
+      assert int(result.get_data()) == 0

[impala] 03/03: IMPALA-10397: Fix test_single_workload

Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 0b79464d9c74d3cc89230a5a3ec3c3955ea2a953
Author: Bikramjeet Vig <bi...@gmail.com>
AuthorDate: Tue Mar 23 12:51:44 2021 -0700

    IMPALA-10397: Fix test_single_workload
    
    The logs on failed runs indicated that the autoscaler never started
    another cluster. This can only happen if it never notices a queued
    query which is possible since this test was only failing in release
    builds. This patch increases the runtime of the sample query to
    make execution more predictable.
    
    Testing:
    Looped on my local on a release build
    
    Change-Id: Ide3c7fb4509ce9a797b4cbdd141b2a319b923d4e
    Reviewed-on: http://gerrit.cloudera.org:8080/17218
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/custom_cluster/test_auto_scaling.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/tests/custom_cluster/test_auto_scaling.py b/tests/custom_cluster/test_auto_scaling.py
index bbb7dd0..c2863ca 100644
--- a/tests/custom_cluster/test_auto_scaling.py
+++ b/tests/custom_cluster/test_auto_scaling.py
@@ -46,9 +46,9 @@ class TestAutoScaling(CustomClusterTestSuite):
   STATE_CHANGE_TIMEOUT_S = 60
   # This query will scan two partitions (month = 1, 2) and thus will have 1 fragment
   # instance per executor on groups of size 2. Each partition has 2 rows, so it performs
-  # two comparisons and should take around 1 second to complete.
+  # two comparisons and should take around 2 second to complete.
   QUERY = """select * from functional_parquet.alltypestiny where month < 3
-             and id + random() < sleep(500)"""
+             and id + random() < sleep(1000)"""
 
   def _get_total_admitted_queries(self):
     admitted_queries = self.impalad_test_service.get_total_admitted_queries(
@@ -96,7 +96,7 @@ class TestAutoScaling(CustomClusterTestSuite):
       assert any(self._get_total_admitted_queries() >= 10 or sleep(1)
                  for _ in range(self.STATE_CHANGE_TIMEOUT_S)), \
           "Did not admit enough queries within %s s" % self.STATE_CHANGE_TIMEOUT_S
-
+      single_group_query_rate = workload.get_query_rate()
       # Wait for second executor group to start
       cluster_size = (2 * GROUP_SIZE) + 1
       assert any(self._get_num_backends() >= cluster_size or sleep(1)
@@ -109,7 +109,6 @@ class TestAutoScaling(CustomClusterTestSuite):
       # Wait for query rate to exceed the maximum for a single executor group. In the past
       # we tried to wait for it to pass a higher threshold but on some platforms we saw
       # that it was too flaky.
-      min_query_rate = EXECUTOR_SLOTS
       max_query_rate = 0
       # This barrier has been flaky in the past so we wait 2x as long as for the other
       # checks.
@@ -118,12 +117,12 @@ class TestAutoScaling(CustomClusterTestSuite):
         current_rate = workload.get_query_rate()
         LOG.info("Current rate: %s" % current_rate)
         max_query_rate = max(max_query_rate, current_rate)
-        if max_query_rate > min_query_rate:
+        if max_query_rate > single_group_query_rate:
           break
         sleep(1)
 
-      assert max_query_rate >= min_query_rate, "Query rate did not reach %s within %s " \
-          "s. Maximum was %s. Cluster size is %s." % (min_query_rate,
+      assert max_query_rate > single_group_query_rate, "Query rate did not exceed %s " \
+          "within %s s. Maximum was %s. Cluster size is %s." % (single_group_query_rate,
           self.STATE_CHANGE_TIMEOUT_S, max_query_rate, cluster_size)
 
       LOG.info("Stopping workload")