You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2022/12/06 21:21:01 UTC
[impala] 03/03: IMPALA-11584: Enable minicluster tests for Ozone

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 8cd4a1e4e5cbf426294c1158936402bf21433f3c
Author: Michael Smith <mi...@cloudera.com>
AuthorDate: Wed Nov 16 11:56:20 2022 -0800

    IMPALA-11584: Enable minicluster tests for Ozone
    
    Enables tests guarded by SkipIfNotHdfsMinicluster to run on Ozone as
    well as HDFS. Plans are still skipped for Ozone because there's
    Ozone-specific text in the plan output.
    
    Updates explain output to allow for Ozone, which has a block size of
    256MB instead of 128MB. One of the partitions read in test_explain is
    ~180MB, straddling the difference between Ozone and HDFS.
    
    Testing: ran affected tests with Ozone.
    
    Change-Id: I6b06ceacf951dbc966aa409cf24a310c9676fe7f
    Reviewed-on: http://gerrit.cloudera.org:8080/19250
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Joe McDonnell <jo...@cloudera.com>
---
 .../queries/QueryTest/explain-level0.test          |  4 ++--
 .../queries/QueryTest/explain-level1.test          |  4 ++--
 .../queries/QueryTest/explain-level2.test          |  6 +++---
 .../queries/QueryTest/explain-level3.test          |  6 +++---
 .../QueryTest/mt-dop-parquet-scheduling.test       | 24 +++++++++++-----------
 tests/common/skip.py                               | 12 +++++------
 tests/custom_cluster/test_hdfs_timeout.py          |  4 +++-
 tests/custom_cluster/test_scheduler_locality.py    |  4 ++--
 tests/query_test/test_mem_usage_scaling.py         |  3 ++-
 tests/query_test/test_scanners.py                  |  1 +
 10 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level0.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level0.test
index 6ee4c0edb..aa80888dc 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/explain-level0.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level0.test
@@ -12,8 +12,8 @@ row_regex:.*Per-Host Resource Estimates: Memory=[0-9.]*MB.*
 '04:EXCHANGE [UNPARTITIONED]'
 '02:HASH JOIN [INNER JOIN, BROADCAST]'
 '|--03:EXCHANGE [BROADCAST]'
-'|  01:SCAN HDFS [tpch.orders]'
-'00:SCAN HDFS [tpch.lineitem]'
+'|  01:SCAN $FILESYSTEM_NAME [tpch.orders]'
+'00:SCAN $FILESYSTEM_NAME [tpch.lineitem]'
 ====
 ---- QUERY
 # Tests the warning about missing table stats in the explain header.
diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test
index 476dabc43..26d4aef9d 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test
@@ -19,11 +19,11 @@ row_regex:.*row-size=.* cardinality=.*
 '|'
 '|--03:EXCHANGE [BROADCAST]'
 '|  |'
-'|  01:SCAN HDFS [tpch.orders]'
+'|  01:SCAN $FILESYSTEM_NAME [tpch.orders]'
 row_regex:.*partitions=1/1 files=1 size=.*
 row_regex:.*row-size=.* cardinality=.*
 '|'
-'00:SCAN HDFS [tpch.lineitem]'
+'00:SCAN $FILESYSTEM_NAME [tpch.lineitem]'
 row_regex:.*partitions=1/1 files=1 size=.*
 '   runtime filters: RF000 -> l_orderkey'
 row_regex:.*row-size=.* cardinality=.*
diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test
index 75444c03e..8b72f4a69 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test
@@ -36,9 +36,9 @@ row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-re
 '|  |  tuple-ids=1 row-size=171B cardinality=1.50M'
 '|  |  in pipelines: 01(GETNEXT)'
 '|  |'
-'|  F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2'
+row_regex:.*F01:PLAN FRAGMENT \[RANDOM\] hosts=[1-2] instances=[1-2]
 row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
-'|  01:SCAN HDFS [tpch.orders, RANDOM]'
+'|  01:SCAN $FILESYSTEM_NAME [tpch.orders, RANDOM]'
 row_regex:.*partitions=1/1 files=1 size=.*
 '|     stored statistics:'
 row_regex:.*table: rows=[0-9.]*[A-Z]* size=.*
@@ -48,7 +48,7 @@ row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-re
 '|     tuple-ids=1 row-size=171B cardinality=1.50M'
 '|     in pipelines: 01(GETNEXT)'
 '|'
-'00:SCAN HDFS [tpch.lineitem, RANDOM]'
+'00:SCAN $FILESYSTEM_NAME [tpch.lineitem, RANDOM]'
 row_regex:.*partitions=1/1 files=1 size=.*
 '   runtime filters: RF000[bloom] -> l_orderkey'
 '   stored statistics:'
diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test
index 1865c9c50..bf953b209 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test
@@ -38,7 +38,7 @@ row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-re
 '  |     tuple-ids=1 row-size=171B cardinality=1.50M'
 '  |     in pipelines: 01(GETNEXT)'
 '  |'
-'  00:SCAN HDFS [tpch.lineitem, RANDOM]'
+'  00:SCAN $FILESYSTEM_NAME [tpch.lineitem, RANDOM]'
 row_regex:.*partitions=1/1 files=1 size=.*
 '     runtime filters: RF000[bloom] -> l_orderkey'
 '     stored statistics:'
@@ -50,11 +50,11 @@ row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-re
 '     tuple-ids=0 row-size=231B cardinality=6.00M'
 '     in pipelines: 00(GETNEXT)'
 ''
-'F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2'
+row_regex:.*F01:PLAN FRAGMENT \[RANDOM\] hosts=[1-2] instances=[1-2]
 row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.*
 '  DATASTREAM SINK [FRAGMENT=F00, EXCHANGE=03, BROADCAST]'
 row_regex:.*  |  mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0
-'  01:SCAN HDFS [tpch.orders, RANDOM]'
+'  01:SCAN $FILESYSTEM_NAME [tpch.orders, RANDOM]'
 row_regex:.*partitions=1/1 files=1 size=.*
 '     stored statistics:'
 row_regex:.*table: rows=[0-9.]*[A-Z]* size=.*
diff --git a/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-scheduling.test b/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-scheduling.test
index 800fb67fe..3b382015f 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-scheduling.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-scheduling.test
@@ -44,9 +44,9 @@ row_regex:.*AdmissionSlots: 4 .*
 row_regex:.*F04:ROOT * 1 * 1 .*
 row_regex:.*04:AGGREGATE * 3 * 12 .*
 row_regex:.*00:UNION * 3 * 12 *
-row_regex:.*02:SCAN HDFS * 3 * 12 .*alltypessmall.*
-row_regex:.*03:SCAN HDFS * 3 * 12 .*alltypestiny.*
-row_regex:.*01:SCAN HDFS * 3 * 12 .*alltypes.*
+row_regex:.*02:SCAN (HDFS|OZONE) * 3 * 12 .*alltypessmall.*
+row_regex:.*03:SCAN (HDFS|OZONE) * 3 * 12 .*alltypestiny.*
+row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 12 .*alltypes.*
 ====
 ---- QUERY
 # Same idea, but with smallest scan first to check that the scheduler is taking the
@@ -64,9 +64,9 @@ row_regex:.*AdmissionSlots: 4 .*
 row_regex:.*F04:ROOT * 1 * 1 .*
 row_regex:.*04:AGGREGATE * 3 * 12 .*
 row_regex:.*00:UNION * 3 * 12 *
-row_regex:.*02:SCAN HDFS * 3 * 12 .*alltypessmall.*
-row_regex:.*03:SCAN HDFS * 3 * 12 .*alltypes.*
-row_regex:.*01:SCAN HDFS * 3 * 12 .*alltypestiny.*
+row_regex:.*02:SCAN (HDFS|OZONE) * 3 * 12 .*alltypessmall.*
+row_regex:.*03:SCAN (HDFS|OZONE) * 3 * 12 .*alltypes.*
+row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 12 .*alltypestiny.*
 ====
 ---- QUERY
 # This query should have one scan and one exchange in the interior fragment.
@@ -85,8 +85,8 @@ row_regex:.*04:AGGREGATE * 3 * 12 .*
 row_regex:.*06:AGGREGATE * 3 * 12 .*
 row_regex:.*03:AGGREGATE * 3 * 12 .*
 row_regex:.*00:UNION * 3 * 12 *
-row_regex:.*02:SCAN HDFS * 3 * 12 .*alltypes.*
-row_regex:.*01:SCAN HDFS * 3 * 12 .*alltypestiny.*
+row_regex:.*02:SCAN (HDFS|OZONE) * 3 * 12 .*alltypes.*
+row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 12 .*alltypestiny.*
 ====
 ---- QUERY
 # This query should have one scan and one exchange in the interior fragment.
@@ -107,8 +107,8 @@ row_regex:.*04:AGGREGATE * 3 * 12 .*
 row_regex:.*06:AGGREGATE * 3 * 12 .*
 row_regex:.*03:AGGREGATE * 3 * 4 .*
 row_regex:.*00:UNION * 3 * 12 *
-row_regex:.*02:SCAN HDFS * 3 * 4 .*alltypestiny.*
-row_regex:.*01:SCAN HDFS * 3 * 12 .*alltypes.*
+row_regex:.*02:SCAN (HDFS|OZONE) * 3 * 4 .*alltypestiny.*
+row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 12 .*alltypes.*
 ====
 ---- QUERY
 # This query should have one scan and two exchanges in the interior fragment.
@@ -128,6 +128,6 @@ row_regex:.*AdmissionSlots: 2.*
 row_regex:.*00:UNION * 3 * 6 .*
 row_regex:.*08:AGGREGATE * 3 * 6 .*
 row_regex:.*03:AGGREGATE * 3 * 6 .*
-row_regex:.*04:SCAN HDFS * 3 * 6 .*
-row_regex:.*01:SCAN HDFS * 3 * 6 .*
+row_regex:.*04:SCAN (HDFS|OZONE) * 3 * 6 .*
+row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 6 .*
 ====
diff --git a/tests/common/skip.py b/tests/common/skip.py
index e5b856485..46fc303fa 100644
--- a/tests/common/skip.py
+++ b/tests/common/skip.py
@@ -55,10 +55,6 @@ class SkipIfFS:
   hdfs_block_size = pytest.mark.skipif(not IS_HDFS,
       reason="Size of block reported to Impala is not ~128MB")
   hdfs_acls = pytest.mark.skipif(not IS_HDFS, reason="HDFS acls are not supported")
-  # TODO: IMPALA-11584: see if this can be collapsed into SkipIfNotHdfsMinicluster
-  always_remote = pytest.mark.skipif(IS_EC or not (IS_HDFS or IS_OZONE)
-      or IMPALA_TEST_CLUSTER_PROPERTIES.is_remote_cluster(),
-      reason="Only HDFS and Ozone tests are run co-located")
 
   # Special case product limitations.
   empty_directory = pytest.mark.skipif(IS_S3,
@@ -73,6 +69,8 @@ class SkipIfFS:
   read_past_eof = pytest.mark.skipif(IS_S3 or IS_GCS, reason="IMPALA-2512")
   large_block_size = pytest.mark.skipif(IS_OZONE or IS_EC,
       reason="block size is larger than 128MB")
+  read_speed_dependent = pytest.mark.skipif(not IS_HDFS or IS_EC,
+      reason="success depends on fast scan node performance")
 
   # These need test infra work to re-enable.
   hive = pytest.mark.skipif(not IS_HDFS, reason="Hive doesn't work")
@@ -133,10 +131,10 @@ class SkipIfLocal:
 class SkipIfNotHdfsMinicluster:
   # These are skipped when not running against a local HDFS mini-cluster.
   plans = pytest.mark.skipif(
-      not IS_HDFS or IMPALA_TEST_CLUSTER_PROPERTIES.is_remote_cluster(),
+      not (IS_HDFS or IS_OZONE) or IMPALA_TEST_CLUSTER_PROPERTIES.is_remote_cluster(),
       reason="Test assumes plans from local HDFS mini-cluster")
-  tuned_for_minicluster = pytest.mark.skipif(
-      not IS_HDFS or IS_EC or IMPALA_TEST_CLUSTER_PROPERTIES.is_remote_cluster(),
+  tuned_for_minicluster = pytest.mark.skipif(not (IS_HDFS or IS_OZONE)
+      or IS_EC or IMPALA_TEST_CLUSTER_PROPERTIES.is_remote_cluster(),
       reason="Test is tuned for 3-node HDFS minicluster with no EC")
   scheduling = pytest.mark.skipif(
       not (IS_HDFS or IS_OZONE) or IS_EC or pytest.config.option.testing_remote_cluster,
diff --git a/tests/custom_cluster/test_hdfs_timeout.py b/tests/custom_cluster/test_hdfs_timeout.py
index 0967427f7..9e9b84a96 100644
--- a/tests/custom_cluster/test_hdfs_timeout.py
+++ b/tests/custom_cluster/test_hdfs_timeout.py
@@ -22,6 +22,7 @@ import time
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 from tests.common.skip import SkipIfNotHdfsMinicluster
 from subprocess import check_call
+from tests.util.filesystem_utils import IS_OZONE
 from tests.util.shell_util import exec_process
 
 
@@ -43,7 +44,8 @@ class TestHdfsTimeouts(CustomClusterTestSuite):
 
     # Find the NameNode's pid via pgrep. This would raise an error if it did not
     # find a pid, so there is at least one match.
-    rc, pgrep_output, stderr = exec_process("pgrep -f namenode.NameNode")
+    data_api_name = 'OzoneManager' if IS_OZONE else 'namenode.NameNode'
+    rc, pgrep_output, stderr = exec_process("pgrep -f {}".format(data_api_name))
     assert rc == 0, \
         "Error finding NameNode pid\nstdout={0}\nstderr={1}".format(pgrep_output, stderr)
     # In our test environment, this should only match one pid
diff --git a/tests/custom_cluster/test_scheduler_locality.py b/tests/custom_cluster/test_scheduler_locality.py
index 0ab1dc301..adfbcd581 100644
--- a/tests/custom_cluster/test_scheduler_locality.py
+++ b/tests/custom_cluster/test_scheduler_locality.py
@@ -19,14 +19,14 @@
 
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 from tests.common.network import get_external_ip
-from tests.common.skip import SkipIfFS
+from tests.common.skip import SkipIfNotHdfsMinicluster
 
 
 LOCAL_ASSIGNMENTS_METRIC = "simple-scheduler.local-assignments.total"
 TOTAL_ASSIGNMENTS_METRIC = "simple-scheduler.assignments.total"
 
 
-@SkipIfFS.always_remote
+@SkipIfNotHdfsMinicluster.tuned_for_minicluster
 class TestSchedulerLocality(CustomClusterTestSuite):
   """Tests for local and remote disk scheduling."""
 
diff --git a/tests/query_test/test_mem_usage_scaling.py b/tests/query_test/test_mem_usage_scaling.py
index ceb1e8498..8910aa336 100644
--- a/tests/query_test/test_mem_usage_scaling.py
+++ b/tests/query_test/test_mem_usage_scaling.py
@@ -23,7 +23,7 @@ from tests.common.test_dimensions import (create_avro_snappy_dimension,
     create_parquet_dimension)
 from tests.common.impala_cluster import ImpalaCluster
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.skip import SkipIfNotHdfsMinicluster
+from tests.common.skip import SkipIfNotHdfsMinicluster, SkipIfFS
 from tests.common.test_dimensions import create_single_exec_option_dimension
 from tests.common.test_vector import ImpalaTestDimension
 from tests.verifiers.metric_verifier import MetricVerifier
@@ -400,6 +400,7 @@ class TestHashJoinMemLimit(ImpalaTestSuite):
 
 
 @SkipIfNotHdfsMinicluster.tuned_for_minicluster
+@SkipIfFS.read_speed_dependent
 class TestExchangeMemUsage(ImpalaTestSuite):
   """Targeted test for exchange memory limits."""
 
diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py
index 22dca16f4..76ed7a254 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -1125,6 +1125,7 @@ class TestParquet(ImpalaTestSuite):
       for summary in page_size_summaries:
         assert not self._is_summary_stats_counter_empty(summary)
 
+  @SkipIfFS.hdfs_small_block
   @SkipIfNotHdfsMinicluster.tuned_for_minicluster
   def test_bytes_read_per_column(self, vector):
     """IMPALA-6964: Test that the counter Parquet[Un]compressedBytesReadPerColumn is