You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2022/09/19 21:11:44 UTC
[impala] branch master updated (5fd4e6a11 -> 19114c720)
This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
from 5fd4e6a11 IMPALA-11438: Add tests for CREATE TABLE LIKE PARQUET STORED AS ICEBERG
new 17ee89f3d IMPALA-11573: Certain methods used by the replanning feature can be improved
new 190b5e41b IMPALA-11572: deflake test_mt_dop_skew_lpt
new 19114c720 IMPALA-11578: Exclude locality test for remote FS
The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
.../java/org/apache/impala/service/Frontend.java | 19 ++++++-----
tests/common/skip.py | 4 +++
tests/custom_cluster/test_scheduler_locality.py | 2 ++
tests/query_test/test_scanners.py | 39 +++++++++++++---------
4 files changed, 40 insertions(+), 24 deletions(-)
[impala] 02/03: IMPALA-11572: deflake test_mt_dop_skew_lpt
Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 190b5e41b1e9d24a91432cc470c91e6fff84a041
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Mon Sep 12 15:49:46 2022 +0200
IMPALA-11572: deflake test_mt_dop_skew_lpt
test_mt_dop_skew_lpt was flaky. Also, it calculated the
min(bytes_read) / max(bytes_read) globally across all fragment
insteances, not just among the intra-node fragment instances.
To deflake the test, this test:
* calculate intra-node min(bytes_read) / max(bytes_read) ratios
instead of global ones
* print out the ratios so we'll know the numbers when the test fails
* eliminate compression codec test dimension which is not used anyway
Change-Id: I823542c21fe8f10f43a501fe4175da883eaf2f99
Reviewed-on: http://gerrit.cloudera.org:8080/18970
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
tests/query_test/test_scanners.py | 39 ++++++++++++++++++++++++---------------
1 file changed, 24 insertions(+), 15 deletions(-)
diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py
index 45865bee6..f49ebbd53 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -375,7 +375,8 @@ class TestHdfsScannerSkew(ImpalaTestSuite):
def add_test_dimensions(cls):
super(TestHdfsScannerSkew, cls).add_test_dimensions()
cls.ImpalaTestMatrix.add_constraint(lambda v:
- v.get_value('table_format').file_format in ('text'))
+ v.get_value('table_format').file_format in ('text') and
+ v.get_value('table_format').compression_codec == 'none')
@SkipIfLocal.multiple_impalad
def test_mt_dop_skew_lpt(self, vector, unique_database):
@@ -384,32 +385,41 @@ class TestHdfsScannerSkew(ImpalaTestSuite):
load balancing with a shared queue between the instances. With IMPALA-11539
the items in the queue are ordered by scan sizes from largest to smallest, i.e.
we are doing Longest-Processing Time (LPT) scheduling."""
- def bytes_read_statistics(profile):
+ def count_intra_node_skew(profile):
+ SKEW_THRESHOLD = 0.85
lines = [line.strip() for line in profile.splitlines() if "- BytesRead: " in line]
assert len(lines) == 7 # Averaged fragment + 6 fragment
- min = None
- max = None
+ bytes_read_array = []
for i in range(1, len(lines)):
# A line looks like:
# - BytesRead: 202.77 MB (212617555)
# we only need '212617555' from it
bytes_read_str = re.findall(r'\((\d+)\)', lines[i])[0]
bytes_read = int(bytes_read_str)
- if min is None and max is None:
- min = max = bytes_read
- continue
- if bytes_read < min: min = bytes_read
- if bytes_read > max: max = bytes_read
- return [min, max]
+ bytes_read_array.append(bytes_read)
+ count_skew = 0
+ # MT_DOP fragments are next to each other in the profile, so fragment instances
+ # belonging to a single executor starts at 0, 2, 4
+ for i in [0, 2, 4]:
+ a = bytes_read_array[i]
+ b = bytes_read_array[i + 1]
+ if a < b:
+ ratio = float(a) / float(b)
+ else:
+ ratio = float(b) / float(a)
+ print "Intra-node bytes read ratio:", ratio
+ if ratio < SKEW_THRESHOLD:
+ count_skew += 1
+ return count_skew
tbl_name = unique_database + ".lineitem_skew"
with self.create_impala_client() as imp_client:
imp_client.set_configuration_option('mt_dop', '2')
imp_client.execute("""create table {} like tpch.lineitem""".format(tbl_name))
# Create a couple of small data files
- for i in range(1, 5):
+ for i in range(1, 11):
imp_client.execute("""insert into {} select * from tpch.lineitem
- where l_orderkey % 5 = 0""".format(tbl_name))
+ where l_orderkey % 11 = 0""".format(tbl_name))
# Create a couple of large files
imp_client.execute("insert into {} select * from tpch.lineitem".format(tbl_name))
@@ -423,9 +433,8 @@ class TestHdfsScannerSkew(ImpalaTestSuite):
min(l_receiptdate),min(l_shipinstruct),min(l_shipmode),min(l_comment)
from {}""".format(tbl_name))
profile = results.runtime_profile
- [min, max] = bytes_read_statistics(profile)
- if float(min) / float(max) < 0.5: cnt_fail += 1
- assert cnt_fail < 3
+ cnt_fail += count_intra_node_skew(profile)
+ assert cnt_fail <= 5
class TestHudiParquet(ImpalaTestSuite):
[impala] 03/03: IMPALA-11578: Exclude locality test for remote FS
Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 19114c7205318e8df158348a02695c378df1abe6
Author: Michael Smith <mi...@cloudera.com>
AuthorDate: Tue Sep 13 11:13:38 2022 -0700
IMPALA-11578: Exclude locality test for remote FS
Exclude test_scheduler_locality when the filesystem can only be remote.
Change-Id: Ie6198421f21bc2520773ecbb34ffaf65969ebc43
Reviewed-on: http://gerrit.cloudera.org:8080/18980
Reviewed-by: Wenzhe Zhou <wz...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
tests/common/skip.py | 4 ++++
tests/custom_cluster/test_scheduler_locality.py | 2 ++
2 files changed, 6 insertions(+)
diff --git a/tests/common/skip.py b/tests/common/skip.py
index 8f380419f..055030a43 100644
--- a/tests/common/skip.py
+++ b/tests/common/skip.py
@@ -50,6 +50,10 @@ class SkipIfFS:
reason="HDFS encryption is not supported")
hdfs_block_size = pytest.mark.skipif(not IS_HDFS, reason="uses it's own block size")
hdfs_acls = pytest.mark.skipif(not IS_HDFS, reason="HDFS acls are not supported")
+ # TODO: IMPALA-11584: see if this can be collapsed into SkipIfNotHdfsMinicluster
+ always_remote = pytest.mark.skipif(IS_EC or not (IS_HDFS or IS_OZONE)
+ or IMPALA_TEST_CLUSTER_PROPERTIES.is_remote_cluster(),
+ reason="Only HDFS and Ozone tests are run co-located")
# Special case product limitations.
empty_directory = pytest.mark.skipif(IS_S3,
diff --git a/tests/custom_cluster/test_scheduler_locality.py b/tests/custom_cluster/test_scheduler_locality.py
index bccbcfa82..0ab1dc301 100644
--- a/tests/custom_cluster/test_scheduler_locality.py
+++ b/tests/custom_cluster/test_scheduler_locality.py
@@ -19,12 +19,14 @@
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.network import get_external_ip
+from tests.common.skip import SkipIfFS
LOCAL_ASSIGNMENTS_METRIC = "simple-scheduler.local-assignments.total"
TOTAL_ASSIGNMENTS_METRIC = "simple-scheduler.assignments.total"
+@SkipIfFS.always_remote
class TestSchedulerLocality(CustomClusterTestSuite):
"""Tests for local and remote disk scheduling."""
[impala] 01/03: IMPALA-11573: Certain methods used by the replanning feature can be improved
Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 17ee89f3db74fce1045ea9bd8e9f9fe727bea70e
Author: Qifan Chen <qc...@cloudera.com>
AuthorDate: Fri Sep 9 18:29:20 2022 -0400
IMPALA-11573: Certain methods used by the replanning feature can be improved
This patch improves certain methods used by the replan feature
(IMPALA-10992) so that they can be called by the external frontend
component in Hive. Specifically, the declaration of these methods
becomes public static, and the initialization of a static data
member checks whether the dependent object exists.
Testing:
1. Run unit tests;
2. Run "core" tests.
Change-Id: I334523f86e4292e9591306179eb1ab43be316c99
Reviewed-on: http://gerrit.cloudera.org:8080/18968
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Quanlong Huang <hu...@gmail.com>
---
.../main/java/org/apache/impala/service/Frontend.java | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java b/fe/src/main/java/org/apache/impala/service/Frontend.java
index f2044377e..99e2ae7f3 100644
--- a/fe/src/main/java/org/apache/impala/service/Frontend.java
+++ b/fe/src/main/java/org/apache/impala/service/Frontend.java
@@ -231,7 +231,8 @@ public class Frontend {
// Maximum number of times to retry a query if it fails due to inconsistent metadata.
private static final int INCONSISTENT_METADATA_NUM_RETRIES =
- BackendConfig.INSTANCE.getLocalCatalogMaxFetchRetries();
+ (BackendConfig.INSTANCE != null) ?
+ BackendConfig.INSTANCE.getLocalCatalogMaxFetchRetries() : 0;
// Maximum number of threads used to check authorization for the user when executing
// show tables/databases.
@@ -1762,7 +1763,7 @@ public class Frontend {
*
* Also imposes the artificial two-executor groups for testing when needed.
*/
- private List<TExecutorGroupSet> setupThresholdsForExecutorGroupSets(
+ public static List<TExecutorGroupSet> setupThresholdsForExecutorGroupSets(
List<TExecutorGroupSet> executorGroupSets, String request_pool,
boolean default_executor_group, boolean test_replan) throws ImpalaException {
RequestPoolService poolService = RequestPoolService.getInstance();
@@ -1854,7 +1855,7 @@ public class Frontend {
// Only the following types of statements are considered auto scalable since each
// can be planned by the distributed planner utilizing the number of executors in
// an executor group as input.
- private boolean canStmtBeAutoScaled(TStmtType type) {
+ public static boolean canStmtBeAutoScaled(TStmtType type) {
return type == TStmtType.EXPLAIN || type == TStmtType.QUERY || type == TStmtType.DML;
}
@@ -1879,11 +1880,11 @@ public class Frontend {
default_executor_group = e.getExec_group_name_prefix() == null
|| e.getExec_group_name_prefix().isEmpty();
}
-
- List<TExecutorGroupSet> executorGroupSetsToUse = setupThresholdsForExecutorGroupSets(
- originalExecutorGroupSets, queryOptions.getRequest_pool(), default_executor_group,
- enable_replan
- && (RuntimeEnv.INSTANCE.isTestEnv() || queryOptions.isTest_replan()));
+ List<TExecutorGroupSet> executorGroupSetsToUse =
+ Frontend.setupThresholdsForExecutorGroupSets(originalExecutorGroupSets,
+ queryOptions.getRequest_pool(), default_executor_group,
+ enable_replan
+ && (RuntimeEnv.INSTANCE.isTestEnv() || queryOptions.isTest_replan()));
int num_executor_group_sets = executorGroupSetsToUse.size();
if (num_executor_group_sets == 0) {
@@ -1938,7 +1939,7 @@ public class Frontend {
} else if (!enable_replan) {
reason = "query option 'enable_replan' is false";
break;
- } else if (!canStmtBeAutoScaled(req.stmt_type)) {
+ } else if (!Frontend.canStmtBeAutoScaled(req.stmt_type)) {
reason = "query is not auto-scalable";
break;
}