You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2022/07/04 10:11:53 UTC
[spark] branch master updated: [SPARK-39081][PS][TEST][FOLLOWUP] Simplify test_resample

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new ff074c6975f [SPARK-39081][PS][TEST][FOLLOWUP] Simplify test_resample
ff074c6975f is described below

commit ff074c6975f545635342ccf04b2b221986890e4b
Author: Ruifeng Zheng <ru...@apache.org>
AuthorDate: Mon Jul 4 19:11:29 2022 +0900

    [SPARK-39081][PS][TEST][FOLLOWUP] Simplify test_resample
    
    ### What changes were proposed in this pull request?
    test_resample may take 1 hour:
    ```
    Starting test(python3.9): pyspark.pandas.tests.test_resample (temp output: /tmp/python3.9__pyspark.pandas.tests.test_resample___plfrp2x.log)
    Finished test(python3.9): pyspark.pandas.tests.test_resample (3684s)
    ```
    
    this PR is to simplify it:
    1, remove the size of dataset `pdf6`
    2, remove some parameter combinations
    3, increase the parameter offset to reduce the size of output dataset, like `29S` -> `111S`
    
    ### Why are the changes needed?
    to accelarate `pyspark-pandas` module tests
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Updated UT
    
    Closes #37070 from zhengruifeng/py_simplify_test_resample.
    
    Authored-by: Ruifeng Zheng <ru...@apache.org>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 python/pyspark/pandas/tests/test_resample.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/python/pyspark/pandas/tests/test_resample.py b/python/pyspark/pandas/tests/test_resample.py
index e9359b0a8a7..390b41fa302 100644
--- a/python/pyspark/pandas/tests/test_resample.py
+++ b/python/pyspark/pandas/tests/test_resample.py
@@ -94,7 +94,7 @@ class ResampleTest(PandasOnSparkTestCase, TestUtils):
     @property
     def pdf6(self):
         np.random.seed(55)
-        index = pd.date_range(start="2022-05-02 03:04:05", end="2022-05-03 06:07:08", freq="1S")
+        index = pd.date_range(start="2022-05-02 03:04:05", end="2022-05-02 06:07:08", freq="1S")
         return pd.DataFrame(np.random.rand(len(index), 2), index=index, columns=list("AB"))
 
     @property
@@ -229,14 +229,14 @@ class ResampleTest(PandasOnSparkTestCase, TestUtils):
         self._test_resample(
             self.pdf1,
             self.psdf1,
-            ["Y", "3Y", "M", "9M", "D", "17D"],
+            ["3Y", "9M", "17D"],
             ["min", "max", "sum", "mean", "std", "var"],
         )
-        self._test_resample(self.pdf2, self.psdf2, ["3A", "A", "11M", "D"], ["sum"])
-        self._test_resample(self.pdf3, self.psdf3, ["27H", "1D", "2D", "1M"], ["sum"])
-        self._test_resample(self.pdf4, self.psdf4, ["1H", "5H", "D", "2D"], ["sum"])
-        self._test_resample(self.pdf5, self.psdf5, ["1T", "2T", "5MIN", "1H", "2H", "D"], ["sum"])
-        self._test_resample(self.pdf6, self.psdf6, ["1S", "2S", "1MIN", "H", "2H"], ["sum"])
+        self._test_resample(self.pdf2, self.psdf2, ["3A", "11M", "D"], ["sum"])
+        self._test_resample(self.pdf3, self.psdf3, ["2D", "1M"], ["sum"])
+        self._test_resample(self.pdf4, self.psdf4, ["1H", "2D"], ["sum"])
+        self._test_resample(self.pdf5, self.psdf5, ["11T", "55MIN", "2H", "D"], ["sum"])
+        self._test_resample(self.pdf6, self.psdf6, ["29S", "10MIN", "3H"], ["sum"])
 
     def test_series_resample(self):
         self._test_resample(self.pdf1.A, self.psdf1.A, ["4Y"], ["sum"])
@@ -244,7 +244,7 @@ class ResampleTest(PandasOnSparkTestCase, TestUtils):
         self._test_resample(self.pdf3.A, self.psdf3.A, ["18H"], ["sum"])
         self._test_resample(self.pdf4.A, self.psdf4.A, ["6D"], ["sum"])
         self._test_resample(self.pdf5.A, self.psdf5.A, ["47T"], ["sum"])
-        self._test_resample(self.pdf6.A, self.psdf6.A, ["37S"], ["sum"])
+        self._test_resample(self.pdf6.A, self.psdf6.A, ["111S"], ["sum"])
 
     def test_resample_on(self):
         np.random.seed(77)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org