You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/12/13 06:52:05 UTC

(impala) 02/02: Revert "IMPALA-9923: Load ORC serially to hack around flakiness"

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit b03e8ef95c856f499d17ea7815831e30e2e9f467
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Wed Nov 29 18:58:37 2023 -0800

    Revert "IMPALA-9923: Load ORC serially to hack around flakiness"
    
    This reverts commit dc2fdabbd1f2c930348671e17f885c5c54b628e4.
    
    Newer hive version and other fixes has allow ORC loading to happen in
    parallel.
    
    Change-Id: I67f4051dd07273f2b51843cb5c1ec2cf185c5924
    Reviewed-on: http://gerrit.cloudera.org:8080/20755
    Reviewed-by: Riza Suminto <ri...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/load-data.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/bin/load-data.py b/bin/load-data.py
index a4cfd5a97..090524cf5 100755
--- a/bin/load-data.py
+++ b/bin/load-data.py
@@ -396,7 +396,6 @@ def main():
 
     impala_create_files = []
     hive_load_text_files = []
-    hive_load_orc_files = []
     hive_load_nontext_files = []
     hbase_create_files = []
     hbase_postload_files = []
@@ -408,8 +407,6 @@ def main():
       elif hive_load_match in filename:
         if 'text-none-none' in filename:
           hive_load_text_files.append(filename)
-        elif 'orc-def-block' in filename:
-          hive_load_orc_files.append(filename)
         else:
           hive_load_nontext_files.append(filename)
       elif hbase_create_match in filename:
@@ -432,7 +429,6 @@ def main():
 
     log_file_list("Impala Create Files:", impala_create_files)
     log_file_list("Hive Load Text Files:", hive_load_text_files)
-    log_file_list("Hive Load Orc Files:", hive_load_orc_files)
     log_file_list("Hive Load Non-Text Files:", hive_load_nontext_files)
     log_file_list("HBase Create Files:", hbase_create_files)
     log_file_list("HBase Post-Load Files:", hbase_postload_files)
@@ -457,13 +453,6 @@ def main():
     # need to be loaded first
     assert(len(hive_load_text_files) <= 1)
     hive_exec_query_files_parallel(thread_pool, hive_load_text_files)
-    # IMPALA-9923: Run ORC serially separately from other non-text formats. This hacks
-    # around flakiness seen when loading this in parallel. This should be removed as
-    # soon as possible.
-    assert(len(hive_load_orc_files) <= 1)
-    hive_exec_query_files_parallel(thread_pool, hive_load_orc_files)
-
-    # Load all non-text formats (goes parallel)
     hive_exec_query_files_parallel(thread_pool, hive_load_nontext_files)
 
     assert(len(hbase_postload_files) <= 1)