You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/02/01 01:05:26 UTC

[spark] branch branch-3.4 updated: [SPARK-42253][PYTHON] Add test for detecting duplicated error class

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new fa0e1afa4d5 [SPARK-42253][PYTHON] Add test for detecting duplicated error class
fa0e1afa4d5 is described below

commit fa0e1afa4d5f435734b11672e4671a460138aa75
Author: itholic <ha...@databricks.com>
AuthorDate: Wed Feb 1 09:45:29 2023 +0900

    [SPARK-42253][PYTHON] Add test for detecting duplicated error class
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to add test for detecting duplicated name of error classes to keep the error class unique.
    
    ### Why are the changes needed?
    
    The name of error class should be unique, so we should check if it's duplicated or not.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Manually test in case `COLUMN_IN_LIST` is duplicated as below:
    ```shell
    ======================================================================
    FAIL [0.006s]: test_error_classes_duplicated (pyspark.errors.tests.test_errors.ErrorsTest)
    ----------------------------------------------------------------------
    Traceback (most recent call last):
    ...
    AssertionError: False is not true : Duplicate error class: COLUMN_IN_LIST
    
    ----------------------------------------------------------------------
    Ran 2 tests in 0.007s
    
    FAILED (failures=1)
    ```
    
    Closes #39821 from itholic/SPARK-42253.
    
    Lead-authored-by: itholic <ha...@databricks.com>
    Co-authored-by: Hyukjin Kwon <gu...@gmail.com>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
    (cherry picked from commit 4d37e7816ce83dc5928b0d42a536304cb49eb0a3)
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 python/pyspark/errors/error_classes.py     |  5 -----
 python/pyspark/errors/tests/test_errors.py | 15 ++++++++++++++-
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py
index b30b673974c..ab65600eb31 100644
--- a/python/pyspark/errors/error_classes.py
+++ b/python/pyspark/errors/error_classes.py
@@ -59,11 +59,6 @@ ERROR_CLASSES_JSON = """
       "Argument `<arg_name>` must be a DataFrame, got <arg_type>."
     ]
   },
-  "NOT_A_DATAFRAME" : {
-    "message" : [
-      "Argument `<arg_name>` should be a DataFrame, got <arg_type>."
-    ]
-  },
   "NOT_A_DICT" : {
     "message" : [
       "Argument `<arg_name>` should be a dict, got <arg_type>."
diff --git a/python/pyspark/errors/tests/test_errors.py b/python/pyspark/errors/tests/test_errors.py
index 833edcf9f42..4e743bfb9a0 100644
--- a/python/pyspark/errors/tests/test_errors.py
+++ b/python/pyspark/errors/tests/test_errors.py
@@ -16,13 +16,15 @@
 # limitations under the License.
 #
 
+import json
 import unittest
 
+from pyspark.errors.error_classes import ERROR_CLASSES_JSON
 from pyspark.errors.utils import ErrorClassesReader
 
 
 class ErrorsTest(unittest.TestCase):
-    def test_error_classes(self):
+    def test_error_classes_sorted(self):
         # Test error classes is sorted alphabetically
         error_reader = ErrorClassesReader()
         error_class_names = list(error_reader.error_info_map.keys())
@@ -33,6 +35,17 @@ class ErrorsTest(unittest.TestCase):
                 f"after [{error_class_names[i + 1]}]",
             )
 
+    def test_error_classes_duplicated(self):
+        # Test error classes is not duplicated
+        def detect_duplication(pairs):
+            error_classes_json = {}
+            for name, message in pairs:
+                self.assertTrue(name not in error_classes_json, f"Duplicate error class: {name}")
+                error_classes_json[name] = message
+            return error_classes_json
+
+        json.loads(ERROR_CLASSES_JSON, object_pairs_hook=detect_duplication)
+
 
 if __name__ == "__main__":
     import unittest


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org