You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ue...@apache.org on 2017/10/30 02:50:34 UTC
spark git commit: [SPARK-22379][PYTHON] Reduce duplication setUpClass
and tearDownClass in PySpark SQL tests
Repository: spark
Updated Branches:
refs/heads/master 1fe27612d -> 188b47e68
[SPARK-22379][PYTHON] Reduce duplication setUpClass and tearDownClass in PySpark SQL tests
## What changes were proposed in this pull request?
This PR propose to add `ReusedSQLTestCase` which deduplicate `setUpClass` and `tearDownClass` in `sql/tests.py`.
## How was this patch tested?
Jenkins tests and manual tests.
Author: hyukjinkwon <gu...@gmail.com>
Closes #19595 from HyukjinKwon/reduce-dupe.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/188b47e6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/188b47e6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/188b47e6
Branch: refs/heads/master
Commit: 188b47e68350731da775efccc2cda9c61610aa14
Parents: 1fe2761
Author: hyukjinkwon <gu...@gmail.com>
Authored: Mon Oct 30 11:50:22 2017 +0900
Committer: Takuya UESHIN <ue...@databricks.com>
Committed: Mon Oct 30 11:50:22 2017 +0900
----------------------------------------------------------------------
python/pyspark/sql/tests.py | 63 ++++++++++++++--------------------------
1 file changed, 21 insertions(+), 42 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/188b47e6/python/pyspark/sql/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 8ed37c9..483f39a 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -179,6 +179,18 @@ class MyObject(object):
self.value = value
+class ReusedSQLTestCase(ReusedPySparkTestCase):
+ @classmethod
+ def setUpClass(cls):
+ ReusedPySparkTestCase.setUpClass()
+ cls.spark = SparkSession(cls.sc)
+
+ @classmethod
+ def tearDownClass(cls):
+ ReusedPySparkTestCase.tearDownClass()
+ cls.spark.stop()
+
+
class DataTypeTests(unittest.TestCase):
# regression test for SPARK-6055
def test_data_type_eq(self):
@@ -214,21 +226,19 @@ class DataTypeTests(unittest.TestCase):
self.assertRaises(TypeError, struct_field.typeName)
-class SQLTests(ReusedPySparkTestCase):
+class SQLTests(ReusedSQLTestCase):
@classmethod
def setUpClass(cls):
- ReusedPySparkTestCase.setUpClass()
+ ReusedSQLTestCase.setUpClass()
cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
os.unlink(cls.tempdir.name)
- cls.spark = SparkSession(cls.sc)
cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
cls.df = cls.spark.createDataFrame(cls.testData)
@classmethod
def tearDownClass(cls):
- ReusedPySparkTestCase.tearDownClass()
- cls.spark.stop()
+ ReusedSQLTestCase.tearDownClass()
shutil.rmtree(cls.tempdir.name, ignore_errors=True)
def test_sqlcontext_reuses_sparksession(self):
@@ -2623,17 +2633,7 @@ class HiveSparkSubmitTests(SparkSubmitTests):
self.assertTrue(os.path.exists(metastore_path))
-class SQLTests2(ReusedPySparkTestCase):
-
- @classmethod
- def setUpClass(cls):
- ReusedPySparkTestCase.setUpClass()
- cls.spark = SparkSession(cls.sc)
-
- @classmethod
- def tearDownClass(cls):
- ReusedPySparkTestCase.tearDownClass()
- cls.spark.stop()
+class SQLTests2(ReusedSQLTestCase):
# We can't include this test into SQLTests because we will stop class's SparkContext and cause
# other tests failed.
@@ -3082,12 +3082,12 @@ class DataTypeVerificationTests(unittest.TestCase):
@unittest.skipIf(not _have_arrow, "Arrow not installed")
-class ArrowTests(ReusedPySparkTestCase):
+class ArrowTests(ReusedSQLTestCase):
@classmethod
def setUpClass(cls):
from datetime import datetime
- ReusedPySparkTestCase.setUpClass()
+ ReusedSQLTestCase.setUpClass()
# Synchronize default timezone between Python and Java
cls.tz_prev = os.environ.get("TZ", None) # save current tz if set
@@ -3095,7 +3095,6 @@ class ArrowTests(ReusedPySparkTestCase):
os.environ["TZ"] = tz
time.tzset()
- cls.spark = SparkSession(cls.sc)
cls.spark.conf.set("spark.sql.session.timeZone", tz)
cls.spark.conf.set("spark.sql.execution.arrow.enabled", "true")
cls.schema = StructType([
@@ -3116,8 +3115,7 @@ class ArrowTests(ReusedPySparkTestCase):
if cls.tz_prev is not None:
os.environ["TZ"] = cls.tz_prev
time.tzset()
- ReusedPySparkTestCase.tearDownClass()
- cls.spark.stop()
+ ReusedSQLTestCase.tearDownClass()
def assertFramesEqual(self, df_with_arrow, df_without):
msg = ("DataFrame from Arrow is not equal" +
@@ -3169,17 +3167,7 @@ class ArrowTests(ReusedPySparkTestCase):
@unittest.skipIf(not _have_pandas or not _have_arrow, "Pandas or Arrow not installed")
-class VectorizedUDFTests(ReusedPySparkTestCase):
-
- @classmethod
- def setUpClass(cls):
- ReusedPySparkTestCase.setUpClass()
- cls.spark = SparkSession(cls.sc)
-
- @classmethod
- def tearDownClass(cls):
- ReusedPySparkTestCase.tearDownClass()
- cls.spark.stop()
+class VectorizedUDFTests(ReusedSQLTestCase):
def test_vectorized_udf_basic(self):
from pyspark.sql.functions import pandas_udf, col
@@ -3498,16 +3486,7 @@ class VectorizedUDFTests(ReusedPySparkTestCase):
@unittest.skipIf(not _have_pandas or not _have_arrow, "Pandas or Arrow not installed")
-class GroupbyApplyTests(ReusedPySparkTestCase):
- @classmethod
- def setUpClass(cls):
- ReusedPySparkTestCase.setUpClass()
- cls.spark = SparkSession(cls.sc)
-
- @classmethod
- def tearDownClass(cls):
- ReusedPySparkTestCase.tearDownClass()
- cls.spark.stop()
+class GroupbyApplyTests(ReusedSQLTestCase):
def assertFramesEqual(self, expected, result):
msg = ("DataFrames are not equal: " +
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org