You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2022/12/06 21:21:01 UTC
[impala] 03/03: IMPALA-11584: Enable minicluster tests for Ozone
This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 8cd4a1e4e5cbf426294c1158936402bf21433f3c
Author: Michael Smith <mi...@cloudera.com>
AuthorDate: Wed Nov 16 11:56:20 2022 -0800
IMPALA-11584: Enable minicluster tests for Ozone
Enables tests guarded by SkipIfNotHdfsMinicluster to run on Ozone as
well as HDFS. Plans are still skipped for Ozone because there's
Ozone-specific text in the plan output.
Updates explain output to allow for Ozone, which has a block size of
256MB instead of 128MB. One of the partitions read in test_explain is
~180MB, straddling the difference between Ozone and HDFS.
Testing: ran affected tests with Ozone.
Change-Id: I6b06ceacf951dbc966aa409cf24a310c9676fe7f
Reviewed-on: http://gerrit.cloudera.org:8080/19250
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Joe McDonnell <jo...@cloudera.com>
---
.../queries/QueryTest/explain-level0.test | 4 ++--
.../queries/QueryTest/explain-level1.test | 4 ++--
.../queries/QueryTest/explain-level2.test | 6 +++---
.../queries/QueryTest/explain-level3.test | 6 +++---
.../QueryTest/mt-dop-parquet-scheduling.test | 24 +++++++++++-----------
tests/common/skip.py | 12 +++++------
tests/custom_cluster/test_hdfs_timeout.py | 4 +++-
tests/custom_cluster/test_scheduler_locality.py | 4 ++--
tests/query_test/test_mem_usage_scaling.py | 3 ++-
tests/query_test/test_scanners.py | 1 +
10 files changed, 35 insertions(+), 33 deletions(-)
diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level0.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level0.test
index 6ee4c0edb..aa80888dc 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/explain-level0.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level0.test
@@ -12,8 +12,8 @@ row_regex:.*Per-Host Resource Estimates: Memory=[0-9.]*MB.*
'04:EXCHANGE [UNPARTITIONED]'
'02:HASH JOIN [INNER JOIN, BROADCAST]'
'|--03:EXCHANGE [BROADCAST]'
-'| 01:SCAN HDFS [tpch.orders]'
-'00:SCAN HDFS [tpch.lineitem]'
+'| 01:SCAN $FILESYSTEM_NAME [tpch.orders]'
+'00:SCAN $FILESYSTEM_NAME [tpch.lineitem]'
====
---- QUERY
# Tests the warning about missing table stats in the explain header.
diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test
index 476dabc43..26d4aef9d 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test
@@ -19,11 +19,11 @@ row_regex:.*row-size=.* cardinality=.*
'|'
'|--03:EXCHANGE [BROADCAST]'
'| |'
-'| 01:SCAN HDFS [tpch.orders]'
+'| 01:SCAN $FILESYSTEM_NAME [tpch.orders]'
row_regex:.*partitions=1/1 files=1 size=.*
row_regex:.*row-size=.* cardinality=.*
'|'
-'00:SCAN HDFS [tpch.lineitem]'
+'00:SCAN $FILESYSTEM_NAME [tpch.lineitem]'
row_regex:.*partitions=1/1 files=1 size=.*
' runtime filters: RF000 -> l_orderkey'
row_regex:.*row-size=.* cardinality=.*
diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test
index 75444c03e..8b72f4a69 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test
@@ -36,9 +36,9 @@ row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-re
'| | tuple-ids=1 row-size=171B cardinality=1.50M'
'| | in pipelines: 01(GETNEXT)'
'| |'
-'| F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2'
+row_regex:.*F01:PLAN FRAGMENT \[RANDOM\] hosts=[1-2] instances=[1-2]
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
-'| 01:SCAN HDFS [tpch.orders, RANDOM]'
+'| 01:SCAN $FILESYSTEM_NAME [tpch.orders, RANDOM]'
row_regex:.*partitions=1/1 files=1 size=.*
'| stored statistics:'
row_regex:.*table: rows=[0-9.]*[A-Z]* size=.*
@@ -48,7 +48,7 @@ row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-re
'| tuple-ids=1 row-size=171B cardinality=1.50M'
'| in pipelines: 01(GETNEXT)'
'|'
-'00:SCAN HDFS [tpch.lineitem, RANDOM]'
+'00:SCAN $FILESYSTEM_NAME [tpch.lineitem, RANDOM]'
row_regex:.*partitions=1/1 files=1 size=.*
' runtime filters: RF000[bloom] -> l_orderkey'
' stored statistics:'
diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test
index 1865c9c50..bf953b209 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test
@@ -38,7 +38,7 @@ row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-re
' | tuple-ids=1 row-size=171B cardinality=1.50M'
' | in pipelines: 01(GETNEXT)'
' |'
-' 00:SCAN HDFS [tpch.lineitem, RANDOM]'
+' 00:SCAN $FILESYSTEM_NAME [tpch.lineitem, RANDOM]'
row_regex:.*partitions=1/1 files=1 size=.*
' runtime filters: RF000[bloom] -> l_orderkey'
' stored statistics:'
@@ -50,11 +50,11 @@ row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-re
' tuple-ids=0 row-size=231B cardinality=6.00M'
' in pipelines: 00(GETNEXT)'
''
-'F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2'
+row_regex:.*F01:PLAN FRAGMENT \[RANDOM\] hosts=[1-2] instances=[1-2]
row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
' DATASTREAM SINK [FRAGMENT=F00, EXCHANGE=03, BROADCAST]'
row_regex:.* | mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
-' 01:SCAN HDFS [tpch.orders, RANDOM]'
+' 01:SCAN $FILESYSTEM_NAME [tpch.orders, RANDOM]'
row_regex:.*partitions=1/1 files=1 size=.*
' stored statistics:'
row_regex:.*table: rows=[0-9.]*[A-Z]* size=.*
diff --git a/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-scheduling.test b/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-scheduling.test
index 800fb67fe..3b382015f 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-scheduling.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-scheduling.test
@@ -44,9 +44,9 @@ row_regex:.*AdmissionSlots: 4 .*
row_regex:.*F04:ROOT * 1 * 1 .*
row_regex:.*04:AGGREGATE * 3 * 12 .*
row_regex:.*00:UNION * 3 * 12 *
-row_regex:.*02:SCAN HDFS * 3 * 12 .*alltypessmall.*
-row_regex:.*03:SCAN HDFS * 3 * 12 .*alltypestiny.*
-row_regex:.*01:SCAN HDFS * 3 * 12 .*alltypes.*
+row_regex:.*02:SCAN (HDFS|OZONE) * 3 * 12 .*alltypessmall.*
+row_regex:.*03:SCAN (HDFS|OZONE) * 3 * 12 .*alltypestiny.*
+row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 12 .*alltypes.*
====
---- QUERY
# Same idea, but with smallest scan first to check that the scheduler is taking the
@@ -64,9 +64,9 @@ row_regex:.*AdmissionSlots: 4 .*
row_regex:.*F04:ROOT * 1 * 1 .*
row_regex:.*04:AGGREGATE * 3 * 12 .*
row_regex:.*00:UNION * 3 * 12 *
-row_regex:.*02:SCAN HDFS * 3 * 12 .*alltypessmall.*
-row_regex:.*03:SCAN HDFS * 3 * 12 .*alltypes.*
-row_regex:.*01:SCAN HDFS * 3 * 12 .*alltypestiny.*
+row_regex:.*02:SCAN (HDFS|OZONE) * 3 * 12 .*alltypessmall.*
+row_regex:.*03:SCAN (HDFS|OZONE) * 3 * 12 .*alltypes.*
+row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 12 .*alltypestiny.*
====
---- QUERY
# This query should have one scan and one exchange in the interior fragment.
@@ -85,8 +85,8 @@ row_regex:.*04:AGGREGATE * 3 * 12 .*
row_regex:.*06:AGGREGATE * 3 * 12 .*
row_regex:.*03:AGGREGATE * 3 * 12 .*
row_regex:.*00:UNION * 3 * 12 *
-row_regex:.*02:SCAN HDFS * 3 * 12 .*alltypes.*
-row_regex:.*01:SCAN HDFS * 3 * 12 .*alltypestiny.*
+row_regex:.*02:SCAN (HDFS|OZONE) * 3 * 12 .*alltypes.*
+row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 12 .*alltypestiny.*
====
---- QUERY
# This query should have one scan and one exchange in the interior fragment.
@@ -107,8 +107,8 @@ row_regex:.*04:AGGREGATE * 3 * 12 .*
row_regex:.*06:AGGREGATE * 3 * 12 .*
row_regex:.*03:AGGREGATE * 3 * 4 .*
row_regex:.*00:UNION * 3 * 12 *
-row_regex:.*02:SCAN HDFS * 3 * 4 .*alltypestiny.*
-row_regex:.*01:SCAN HDFS * 3 * 12 .*alltypes.*
+row_regex:.*02:SCAN (HDFS|OZONE) * 3 * 4 .*alltypestiny.*
+row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 12 .*alltypes.*
====
---- QUERY
# This query should have one scan and two exchanges in the interior fragment.
@@ -128,6 +128,6 @@ row_regex:.*AdmissionSlots: 2.*
row_regex:.*00:UNION * 3 * 6 .*
row_regex:.*08:AGGREGATE * 3 * 6 .*
row_regex:.*03:AGGREGATE * 3 * 6 .*
-row_regex:.*04:SCAN HDFS * 3 * 6 .*
-row_regex:.*01:SCAN HDFS * 3 * 6 .*
+row_regex:.*04:SCAN (HDFS|OZONE) * 3 * 6 .*
+row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 6 .*
====
diff --git a/tests/common/skip.py b/tests/common/skip.py
index e5b856485..46fc303fa 100644
--- a/tests/common/skip.py
+++ b/tests/common/skip.py
@@ -55,10 +55,6 @@ class SkipIfFS:
hdfs_block_size = pytest.mark.skipif(not IS_HDFS,
reason="Size of block reported to Impala is not ~128MB")
hdfs_acls = pytest.mark.skipif(not IS_HDFS, reason="HDFS acls are not supported")
- # TODO: IMPALA-11584: see if this can be collapsed into SkipIfNotHdfsMinicluster
- always_remote = pytest.mark.skipif(IS_EC or not (IS_HDFS or IS_OZONE)
- or IMPALA_TEST_CLUSTER_PROPERTIES.is_remote_cluster(),
- reason="Only HDFS and Ozone tests are run co-located")
# Special case product limitations.
empty_directory = pytest.mark.skipif(IS_S3,
@@ -73,6 +69,8 @@ class SkipIfFS:
read_past_eof = pytest.mark.skipif(IS_S3 or IS_GCS, reason="IMPALA-2512")
large_block_size = pytest.mark.skipif(IS_OZONE or IS_EC,
reason="block size is larger than 128MB")
+ read_speed_dependent = pytest.mark.skipif(not IS_HDFS or IS_EC,
+ reason="success depends on fast scan node performance")
# These need test infra work to re-enable.
hive = pytest.mark.skipif(not IS_HDFS, reason="Hive doesn't work")
@@ -133,10 +131,10 @@ class SkipIfLocal:
class SkipIfNotHdfsMinicluster:
# These are skipped when not running against a local HDFS mini-cluster.
plans = pytest.mark.skipif(
- not IS_HDFS or IMPALA_TEST_CLUSTER_PROPERTIES.is_remote_cluster(),
+ not (IS_HDFS or IS_OZONE) or IMPALA_TEST_CLUSTER_PROPERTIES.is_remote_cluster(),
reason="Test assumes plans from local HDFS mini-cluster")
- tuned_for_minicluster = pytest.mark.skipif(
- not IS_HDFS or IS_EC or IMPALA_TEST_CLUSTER_PROPERTIES.is_remote_cluster(),
+ tuned_for_minicluster = pytest.mark.skipif(not (IS_HDFS or IS_OZONE)
+ or IS_EC or IMPALA_TEST_CLUSTER_PROPERTIES.is_remote_cluster(),
reason="Test is tuned for 3-node HDFS minicluster with no EC")
scheduling = pytest.mark.skipif(
not (IS_HDFS or IS_OZONE) or IS_EC or pytest.config.option.testing_remote_cluster,
diff --git a/tests/custom_cluster/test_hdfs_timeout.py b/tests/custom_cluster/test_hdfs_timeout.py
index 0967427f7..9e9b84a96 100644
--- a/tests/custom_cluster/test_hdfs_timeout.py
+++ b/tests/custom_cluster/test_hdfs_timeout.py
@@ -22,6 +22,7 @@ import time
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.skip import SkipIfNotHdfsMinicluster
from subprocess import check_call
+from tests.util.filesystem_utils import IS_OZONE
from tests.util.shell_util import exec_process
@@ -43,7 +44,8 @@ class TestHdfsTimeouts(CustomClusterTestSuite):
# Find the NameNode's pid via pgrep. This would raise an error if it did not
# find a pid, so there is at least one match.
- rc, pgrep_output, stderr = exec_process("pgrep -f namenode.NameNode")
+ data_api_name = 'OzoneManager' if IS_OZONE else 'namenode.NameNode'
+ rc, pgrep_output, stderr = exec_process("pgrep -f {}".format(data_api_name))
assert rc == 0, \
"Error finding NameNode pid\nstdout={0}\nstderr={1}".format(pgrep_output, stderr)
# In our test environment, this should only match one pid
diff --git a/tests/custom_cluster/test_scheduler_locality.py b/tests/custom_cluster/test_scheduler_locality.py
index 0ab1dc301..adfbcd581 100644
--- a/tests/custom_cluster/test_scheduler_locality.py
+++ b/tests/custom_cluster/test_scheduler_locality.py
@@ -19,14 +19,14 @@
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.network import get_external_ip
-from tests.common.skip import SkipIfFS
+from tests.common.skip import SkipIfNotHdfsMinicluster
LOCAL_ASSIGNMENTS_METRIC = "simple-scheduler.local-assignments.total"
TOTAL_ASSIGNMENTS_METRIC = "simple-scheduler.assignments.total"
-@SkipIfFS.always_remote
+@SkipIfNotHdfsMinicluster.tuned_for_minicluster
class TestSchedulerLocality(CustomClusterTestSuite):
"""Tests for local and remote disk scheduling."""
diff --git a/tests/query_test/test_mem_usage_scaling.py b/tests/query_test/test_mem_usage_scaling.py
index ceb1e8498..8910aa336 100644
--- a/tests/query_test/test_mem_usage_scaling.py
+++ b/tests/query_test/test_mem_usage_scaling.py
@@ -23,7 +23,7 @@ from tests.common.test_dimensions import (create_avro_snappy_dimension,
create_parquet_dimension)
from tests.common.impala_cluster import ImpalaCluster
from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.skip import SkipIfNotHdfsMinicluster
+from tests.common.skip import SkipIfNotHdfsMinicluster, SkipIfFS
from tests.common.test_dimensions import create_single_exec_option_dimension
from tests.common.test_vector import ImpalaTestDimension
from tests.verifiers.metric_verifier import MetricVerifier
@@ -400,6 +400,7 @@ class TestHashJoinMemLimit(ImpalaTestSuite):
@SkipIfNotHdfsMinicluster.tuned_for_minicluster
+@SkipIfFS.read_speed_dependent
class TestExchangeMemUsage(ImpalaTestSuite):
"""Targeted test for exchange memory limits."""
diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py
index 22dca16f4..76ed7a254 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -1125,6 +1125,7 @@ class TestParquet(ImpalaTestSuite):
for summary in page_size_summaries:
assert not self._is_summary_stats_counter_empty(summary)
+ @SkipIfFS.hdfs_small_block
@SkipIfNotHdfsMinicluster.tuned_for_minicluster
def test_bytes_read_per_column(self, vector):
"""IMPALA-6964: Test that the counter Parquet[Un]compressedBytesReadPerColumn is