You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/12/15 18:33:24 UTC
(impala) 01/02: Revert "Revert "IMPALA-9923: Load ORC serially to hack around ...""
This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 378169be1f571f4d16db2d98b418903c8593889f
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Thu Dec 14 12:46:16 2023 -0800
Revert "Revert "IMPALA-9923: Load ORC serially to hack around ...""
This reverts commit b03e8ef95c856f499d17ea7815831e30e2e9f467.
IMPALA-12630 report several tests were broken due to loading ORC in
parallel with other non-text table format. ORC tables returns to load
serially after this commit.
Change-Id: I5d3f2ee1c15f9aff6aa632a78d86ba32c640e53d
Reviewed-on: http://gerrit.cloudera.org:8080/20795
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
bin/load-data.py | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/bin/load-data.py b/bin/load-data.py
index 090524cf5..a4cfd5a97 100755
--- a/bin/load-data.py
+++ b/bin/load-data.py
@@ -396,6 +396,7 @@ def main():
impala_create_files = []
hive_load_text_files = []
+ hive_load_orc_files = []
hive_load_nontext_files = []
hbase_create_files = []
hbase_postload_files = []
@@ -407,6 +408,8 @@ def main():
elif hive_load_match in filename:
if 'text-none-none' in filename:
hive_load_text_files.append(filename)
+ elif 'orc-def-block' in filename:
+ hive_load_orc_files.append(filename)
else:
hive_load_nontext_files.append(filename)
elif hbase_create_match in filename:
@@ -429,6 +432,7 @@ def main():
log_file_list("Impala Create Files:", impala_create_files)
log_file_list("Hive Load Text Files:", hive_load_text_files)
+ log_file_list("Hive Load Orc Files:", hive_load_orc_files)
log_file_list("Hive Load Non-Text Files:", hive_load_nontext_files)
log_file_list("HBase Create Files:", hbase_create_files)
log_file_list("HBase Post-Load Files:", hbase_postload_files)
@@ -453,6 +457,13 @@ def main():
# need to be loaded first
assert(len(hive_load_text_files) <= 1)
hive_exec_query_files_parallel(thread_pool, hive_load_text_files)
+ # IMPALA-9923: Run ORC serially separately from other non-text formats. This hacks
+ # around flakiness seen when loading this in parallel. This should be removed as
+ # soon as possible.
+ assert(len(hive_load_orc_files) <= 1)
+ hive_exec_query_files_parallel(thread_pool, hive_load_orc_files)
+
+ # Load all non-text formats (goes parallel)
hive_exec_query_files_parallel(thread_pool, hive_load_nontext_files)
assert(len(hbase_postload_files) <= 1)