You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by "allisonwang-db (via GitHub)" <gi...@apache.org> on 2023/07/05 17:59:46 UTC

[GitHub] [spark] allisonwang-db commented on a diff in pull request #41606: [SPARK-44061][PYTHON] Add assertDFEqual util function

allisonwang-db commented on code in PR #41606:
URL: https://github.com/apache/spark/pull/41606#discussion_r1253451021


##########
python/pyspark/sql/tests/test_utils.py:
##########
@@ -16,18 +16,308 @@
 # limitations under the License.
 #
 
+import unittest
+from prettytable import PrettyTable
+
 from pyspark.sql.functions import sha2
 from pyspark.errors import (
     AnalysisException,
     ParseException,
+    PySparkAssertionError,
     IllegalArgumentException,
     SparkUpgradeException,
 )
-from pyspark.testing.sqlutils import ReusedSQLTestCase
+from pyspark.testing.utils import assertDataFrameEqual, blue, red
+from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, pandas_requirement_message
+import pyspark.sql.functions as F
 from pyspark.sql.functions import to_date, unix_timestamp, from_unixtime
+from pyspark.sql.types import StringType, ArrayType, LongType, StructType, MapType, StructField
+
+
+class UtilsTestsMixin:
+    def test_assert_equal_inttype(self):
+        df1 = self.spark.createDataFrame(
+            data=[
+                ("1", 1000),
+                ("2", 3000),
+            ],
+            schema=["id", "amount"],
+        )
+        df2 = self.spark.createDataFrame(
+            data=[
+                ("1", 1000),
+                ("2", 3000),
+            ],
+            schema=["id", "amount"],
+        )
+
+        assertDataFrameEqual(df1, df2)
+
+    def test_assert_equal_arraytype(self):
+        df1 = self.spark.createDataFrame(
+            data=[
+                ("john", ["Python", "Java"]),
+                ("jane", ["Scala", "SQL", "Java"]),
+            ],
+            schema=StructType(
+                [
+                    StructField("name", StringType(), True),
+                    StructField("languages", ArrayType(StringType()), True),
+                ]
+            ),
+        )
+        df2 = self.spark.createDataFrame(
+            data=[
+                ("john", ["Python", "Java"]),
+                ("jane", ["Scala", "SQL", "Java"]),
+            ],
+            schema=StructType(
+                [
+                    StructField("name", StringType(), True),
+                    StructField("languages", ArrayType(StringType()), True),
+                ]
+            ),
+        )
+
+        assertDataFrameEqual(df1, df2)
+
+    def test_assert_notequal_arraytype(self):
+        df1 = self.spark.createDataFrame(
+            data=[
+                ("John", ["Python", "Java"]),
+                ("Jane", ["Scala", "SQL", "Java"]),
+            ],
+            schema=StructType(
+                [
+                    StructField("name", StringType(), True),
+                    StructField("languages", ArrayType(StringType()), True),
+                ]
+            ),
+        )
+        df2 = self.spark.createDataFrame(
+            data=[
+                ("John", ["Python", "Java"]),
+                ("Jane", ["Scala", "Java"]),
+            ],
+            schema=StructType(
+                [
+                    StructField("name", StringType(), True),
+                    StructField("languages", ArrayType(StringType()), True),
+                ]
+            ),
+        )
+
+        expected_error_table = PrettyTable(["df", "expected"])
+        expected_error_table.add_row(
+            [red(df1.sort(df1.columns).collect()[0]), red(df2.sort(df2.columns).collect()[0])]
+        )
+        expected_error_table.add_row(
+            [blue(df1.sort(df1.columns).collect()[1]), blue(df2.sort(df2.columns).collect()[1])]

Review Comment:
   Super nit: we don't need to color the rows that already match



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org