You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@impala.apache.org by jo...@apache.org on 2020/08/06 00:07:17 UTC

[impala] branch master updated (cc1eddb -> dc2fdab)

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from cc1eddb  Add logging when query unregisters
     new f38ca7d  IMPALA-10037: Remove flaky test_mt_dop_scan_node
     new dc2fdab  IMPALA-9923: Load ORC serially to hack around flakiness

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 bin/load-data.py                | 11 +++++++++++
 tests/query_test/test_mt_dop.py | 43 +----------------------------------------
 2 files changed, 12 insertions(+), 42 deletions(-)

[impala] 01/02: IMPALA-10037: Remove flaky test_mt_dop_scan_node

Posted by jo...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit f38ca7dbbbbf8cf027fcaab4713a6b186b584cef
Author: Bikramjeet Vig <bi...@gmail.com>
AuthorDate: Tue Aug 4 17:14:37 2020 -0700

    IMPALA-10037: Remove flaky test_mt_dop_scan_node
    
    This test has inherent flakiness due to it relying on instances
    fetching scan ranges from a shared queue. Therefore, this patch
    removes the test since it was just a sanity check but its flakiness
    outweighed its usefulness.
    
    Change-Id: I1625872189ea7ac2d4e4d035956f784b6e18eb08
    Reviewed-on: http://gerrit.cloudera.org:8080/16286
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/query_test/test_mt_dop.py | 43 +----------------------------------------
 1 file changed, 1 insertion(+), 42 deletions(-)

diff --git a/tests/query_test/test_mt_dop.py b/tests/query_test/test_mt_dop.py
index 8af3fa8..4f5b50d 100644
--- a/tests/query_test/test_mt_dop.py
+++ b/tests/query_test/test_mt_dop.py
@@ -37,6 +37,7 @@ WAIT_TIME_MS = build_flavor_timeout(60000, slow_build_timeout=100000)
 # the value 0 to cover the non-MT path as well.
 MT_DOP_VALUES = [0, 1, 2, 8]
 
+
 class TestMtDop(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
@@ -97,48 +98,6 @@ class TestMtDop(ImpalaTestSuite):
     assert expected_results in results.data
 
 
-class TestMtDopScanNode(ImpalaTestSuite):
-  @classmethod
-  def get_workload(self):
-    return 'functional-query'
-
-  @classmethod
-  def add_test_dimensions(cls):
-    super(TestMtDopScanNode, cls).add_test_dimensions()
-    cls.ImpalaTestMatrix.add_constraint(
-      lambda v: v.get_value('table_format').file_format == 'text' and v.get_value(
-        'table_format').compression_codec == 'none')
-
-  def test_mt_dop_scan_node(self, vector, unique_database):
-    """Regression test to make sure scan ranges are shared among all scan node instances
-    when using mt_dop. This runs a selective hash join that will dynamically prune
-    partitions leaving less than 5% of the data. Before IMPALA-9655 this would almost
-    always result in a failure where at least one instance would have all its statically
-    assigned scan ranges pruned."""
-    fq_table_name = "%s.store_sales_subset" % unique_database
-    self.execute_query("create table %s as select distinct(ss_sold_date_sk) as "
-                       "sold_date from tpcds.store_sales limit 50" % fq_table_name)
-    vector.get_value('exec_option')['mt_dop'] = 8
-    vector.get_value('exec_option')['runtime_filter_wait_time_ms'] = 100000
-
-    # Since this depends on instances fetching scan ranges from a shared queue, running
-    # it multiple times ensures any flakiness is removed. On a release build it has a
-    # 0.05% failure rate.
-    NUM_TRIES = 100
-    failed_count = 0
-    for i in xrange(NUM_TRIES):
-      try:
-        result = self.execute_query(
-          "select count(ss_sold_date_sk) from tpcds.store_sales, %s where "
-          "ss_sold_date_sk = sold_date" % fq_table_name,
-          vector.get_value('exec_option'))
-        assert "- BytesRead: 0" not in result.runtime_profile, result.runtime_profile
-        break
-      except Exception:
-        failed_count += 1
-        if i == NUM_TRIES - 1: raise
-    LOG.info("Num of times failed before success {0}".format(failed_count))
-
 class TestMtDopParquet(ImpalaTestSuite):
   @classmethod
   def get_workload(cls):

[impala] 02/02: IMPALA-9923: Load ORC serially to hack around flakiness

Posted by jo...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit dc2fdabbd1f2c930348671e17f885c5c54b628e4
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Tue Aug 4 22:08:22 2020 -0700

    IMPALA-9923: Load ORC serially to hack around flakiness
    
    ORC dataload has been intermittently failing with
    "Fail to get checksum, since file .../_orc_acid_version is under construction."
    This is due to some Hive/HDFS interaction that seems to get
    worse with parallelism.
    
    This has been hitting a lot of developer tests. As a temporary
    workaround, this changes dataload to load ORC serially. This is
    slightly slower, but it should be more reliable.
    
    Testing:
     - Ran precommit tests, manually verified dataload logs
    
    Change-Id: I15eff1ec6cab32c1216ed7400e4c4b57bb81e4cd
    Reviewed-on: http://gerrit.cloudera.org:8080/16292
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/load-data.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/bin/load-data.py b/bin/load-data.py
index b461d7a..a7eb883 100755
--- a/bin/load-data.py
+++ b/bin/load-data.py
@@ -415,6 +415,7 @@ def main():
 
     impala_create_files = []
     hive_load_text_files = []
+    hive_load_orc_files = []
     hive_load_nontext_files = []
     hbase_create_files = []
     hbase_postload_files = []
@@ -426,6 +427,8 @@ def main():
       elif hive_load_match in filename:
         if 'text-none-none' in filename:
           hive_load_text_files.append(filename)
+        elif 'orc-def-block' in filename:
+          hive_load_orc_files.append(filename)
         else:
           hive_load_nontext_files.append(filename)
       elif hbase_create_match in filename:
@@ -448,6 +451,7 @@ def main():
 
     log_file_list("Impala Create Files:", impala_create_files)
     log_file_list("Hive Load Text Files:", hive_load_text_files)
+    log_file_list("Hive Load Orc Files:", hive_load_orc_files)
     log_file_list("Hive Load Non-Text Files:", hive_load_nontext_files)
     log_file_list("HBase Create Files:", hbase_create_files)
     log_file_list("HBase Post-Load Files:", hbase_postload_files)
@@ -472,6 +476,13 @@ def main():
     # need to be loaded first
     assert(len(hive_load_text_files) <= 1)
     hive_exec_query_files_parallel(thread_pool, hive_load_text_files)
+    # IMPALA-9923: Run ORC serially separately from other non-text formats. This hacks
+    # around flakiness seen when loading this in parallel. This should be removed as
+    # soon as possible.
+    assert(len(hive_load_orc_files) <= 1)
+    hive_exec_query_files_parallel(thread_pool, hive_load_orc_files)
+
+    # Load all non-text formats (goes parallel)
     hive_exec_query_files_parallel(thread_pool, hive_load_nontext_files)
 
     assert(len(hbase_postload_files) <= 1)