You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by "mathewjacob1002 (via GitHub)" <gi...@apache.org> on 2023/07/12 21:19:34 UTC

[GitHub] [spark] mathewjacob1002 commented on a diff in pull request #41946: [SPARK-44264] FunctionPickler Class

mathewjacob1002 commented on code in PR #41946:
URL: https://github.com/apache/spark/pull/41946#discussion_r1261735506


##########
python/pyspark/ml/tests/test_util.py:
##########
@@ -15,63 +15,188 @@
 # limitations under the License.
 #
 
+from collections.abc import Iterable
+from contextlib import contextmanager
+import os
+from re import A
+import textwrap
+from typing import Iterator
+
 import unittest
 
+from pyspark import cloudpickle
 from pyspark.ml import Pipeline
 from pyspark.ml.classification import LogisticRegression, OneVsRest
 from pyspark.ml.feature import VectorAssembler
 from pyspark.ml.linalg import Vectors
-from pyspark.ml.util import MetaAlgorithmReadWrite
+from pyspark.ml.util import MetaAlgorithmReadWrite, FunctionPickler
 from pyspark.testing.mlutils import SparkSessionTestCase
 
-
 class MetaAlgorithmReadWriteTests(SparkSessionTestCase):
-    def test_getAllNestedStages(self):
-        def _check_uid_set_equal(stages, expected_stages):
-            uids = set(map(lambda x: x.uid, stages))
-            expected_uids = set(map(lambda x: x.uid, expected_stages))
-            self.assertEqual(uids, expected_uids)
-
-        df1 = self.spark.createDataFrame(
-            [
-                (Vectors.dense([1.0, 2.0]), 1.0),
-                (Vectors.dense([-1.0, -2.0]), 0.0),
-            ],
-            ["features", "label"],
-        )
-        df2 = self.spark.createDataFrame(
-            [
-                (1.0, 2.0, 1.0),
-                (1.0, 2.0, 0.0),
-            ],
-            ["a", "b", "label"],
-        )
-        vs = VectorAssembler(inputCols=["a", "b"], outputCol="features")
-        lr = LogisticRegression()
-        pipeline = Pipeline(stages=[vs, lr])
-        pipelineModel = pipeline.fit(df2)
-        ova = OneVsRest(classifier=lr)
-        ovaModel = ova.fit(df1)
-
-        ova_pipeline = Pipeline(stages=[vs, ova])
-        nested_pipeline = Pipeline(stages=[ova_pipeline])
-
-        _check_uid_set_equal(
-            MetaAlgorithmReadWrite.getAllNestedStages(pipeline), [pipeline, vs, lr]
-        )
-        _check_uid_set_equal(
-            MetaAlgorithmReadWrite.getAllNestedStages(pipelineModel),
-            [pipelineModel] + pipelineModel.stages,
-        )
-        _check_uid_set_equal(MetaAlgorithmReadWrite.getAllNestedStages(ova), [ova, lr])
-        _check_uid_set_equal(
-            MetaAlgorithmReadWrite.getAllNestedStages(ovaModel), [ovaModel, lr] + ovaModel.models
-        )
-        _check_uid_set_equal(
-            MetaAlgorithmReadWrite.getAllNestedStages(nested_pipeline),
-            [nested_pipeline, ova_pipeline, vs, ova, lr],
+     def test_getAllNestedStages(self):

Review Comment:
   Fixed the code so that it doesn't show a diff now.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org