You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/12/13 06:52:05 UTC
(impala) 02/02: Revert "IMPALA-9923: Load ORC serially to hack around flakiness"
This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit b03e8ef95c856f499d17ea7815831e30e2e9f467
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Wed Nov 29 18:58:37 2023 -0800
Revert "IMPALA-9923: Load ORC serially to hack around flakiness"
This reverts commit dc2fdabbd1f2c930348671e17f885c5c54b628e4.
Newer hive version and other fixes has allow ORC loading to happen in
parallel.
Change-Id: I67f4051dd07273f2b51843cb5c1ec2cf185c5924
Reviewed-on: http://gerrit.cloudera.org:8080/20755
Reviewed-by: Riza Suminto <ri...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
bin/load-data.py | 11 -----------
1 file changed, 11 deletions(-)
diff --git a/bin/load-data.py b/bin/load-data.py
index a4cfd5a97..090524cf5 100755
--- a/bin/load-data.py
+++ b/bin/load-data.py
@@ -396,7 +396,6 @@ def main():
impala_create_files = []
hive_load_text_files = []
- hive_load_orc_files = []
hive_load_nontext_files = []
hbase_create_files = []
hbase_postload_files = []
@@ -408,8 +407,6 @@ def main():
elif hive_load_match in filename:
if 'text-none-none' in filename:
hive_load_text_files.append(filename)
- elif 'orc-def-block' in filename:
- hive_load_orc_files.append(filename)
else:
hive_load_nontext_files.append(filename)
elif hbase_create_match in filename:
@@ -432,7 +429,6 @@ def main():
log_file_list("Impala Create Files:", impala_create_files)
log_file_list("Hive Load Text Files:", hive_load_text_files)
- log_file_list("Hive Load Orc Files:", hive_load_orc_files)
log_file_list("Hive Load Non-Text Files:", hive_load_nontext_files)
log_file_list("HBase Create Files:", hbase_create_files)
log_file_list("HBase Post-Load Files:", hbase_postload_files)
@@ -457,13 +453,6 @@ def main():
# need to be loaded first
assert(len(hive_load_text_files) <= 1)
hive_exec_query_files_parallel(thread_pool, hive_load_text_files)
- # IMPALA-9923: Run ORC serially separately from other non-text formats. This hacks
- # around flakiness seen when loading this in parallel. This should be removed as
- # soon as possible.
- assert(len(hive_load_orc_files) <= 1)
- hive_exec_query_files_parallel(thread_pool, hive_load_orc_files)
-
- # Load all non-text formats (goes parallel)
hive_exec_query_files_parallel(thread_pool, hive_load_nontext_files)
assert(len(hbase_postload_files) <= 1)