You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/06/30 19:27:31 UTC

spark git commit: [SPARK-8679] [PYSPARK] [MLLIB] Default values in Pipeline API should be immutable

Repository: spark
Updated Branches:
  refs/heads/master 45281664e -> 5fa086362


[SPARK-8679] [PYSPARK] [MLLIB] Default values in Pipeline API should be immutable

It might be dangerous to have a mutable as value for default param. (http://stackoverflow.com/a/11416002/1170730)

e.g

    def func(example, f={}):
        f[example] = 1
        return f

    func(2)

    {2: 1}
    func(3)
    {2:1, 3:1}

mengxr

Author: MechCoder <ma...@gmail.com>

Closes #7058 from MechCoder/pipeline_api_playground and squashes the following commits:

40a5eb2 [MechCoder] copy
95f7ff2 [MechCoder] [SPARK-8679] [PySpark] [MLlib] Default values in Pipeline API should be immutable


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5fa08636
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5fa08636
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5fa08636

Branch: refs/heads/master
Commit: 5fa0863626aaf5a9a41756a0b1ec82bddccbf067
Parents: 4528166
Author: MechCoder <ma...@gmail.com>
Authored: Tue Jun 30 10:27:29 2015 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Tue Jun 30 10:27:29 2015 -0700

----------------------------------------------------------------------
 python/pyspark/ml/pipeline.py | 24 ++++++++++++++++++------
 python/pyspark/ml/wrapper.py  |  4 +++-
 2 files changed, 21 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/5fa08636/python/pyspark/ml/pipeline.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
index a563024..9889f56 100644
--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@@ -42,7 +42,7 @@ class Estimator(Params):
         """
         raise NotImplementedError()
 
-    def fit(self, dataset, params={}):
+    def fit(self, dataset, params=None):
         """
         Fits a model to the input dataset with optional parameters.
 
@@ -54,6 +54,8 @@ class Estimator(Params):
                        list of models.
         :returns: fitted model(s)
         """
+        if params is None:
+            params = dict()
         if isinstance(params, (list, tuple)):
             return [self.fit(dataset, paramMap) for paramMap in params]
         elif isinstance(params, dict):
@@ -86,7 +88,7 @@ class Transformer(Params):
         """
         raise NotImplementedError()
 
-    def transform(self, dataset, params={}):
+    def transform(self, dataset, params=None):
         """
         Transforms the input dataset with optional parameters.
 
@@ -96,6 +98,8 @@ class Transformer(Params):
                        params.
         :returns: transformed dataset
         """
+        if params is None:
+            params = dict()
         if isinstance(params, dict):
             if params:
                 return self.copy(params,)._transform(dataset)
@@ -135,10 +139,12 @@ class Pipeline(Estimator):
     """
 
     @keyword_only
-    def __init__(self, stages=[]):
+    def __init__(self, stages=None):
         """
         __init__(self, stages=[])
         """
+        if stages is None:
+            stages = []
         super(Pipeline, self).__init__()
         #: Param for pipeline stages.
         self.stages = Param(self, "stages", "pipeline stages")
@@ -162,11 +168,13 @@ class Pipeline(Estimator):
             return self._paramMap[self.stages]
 
     @keyword_only
-    def setParams(self, stages=[]):
+    def setParams(self, stages=None):
         """
         setParams(self, stages=[])
         Sets params for Pipeline.
         """
+        if stages is None:
+            stages = []
         kwargs = self.setParams._input_kwargs
         return self._set(**kwargs)
 
@@ -195,7 +203,9 @@ class Pipeline(Estimator):
                 transformers.append(stage)
         return PipelineModel(transformers)
 
-    def copy(self, extra={}):
+    def copy(self, extra=None):
+        if extra is None:
+            extra = dict()
         that = Params.copy(self, extra)
         stages = [stage.copy(extra) for stage in that.getStages()]
         return that.setStages(stages)
@@ -216,6 +226,8 @@ class PipelineModel(Model):
             dataset = t.transform(dataset)
         return dataset
 
-    def copy(self, extra={}):
+    def copy(self, extra=None):
+        if extra is None:
+            extra = dict()
         stages = [stage.copy(extra) for stage in self.stages]
         return PipelineModel(stages)

http://git-wip-us.apache.org/repos/asf/spark/blob/5fa08636/python/pyspark/ml/wrapper.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index 7b0893e..253705b 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -166,7 +166,7 @@ class JavaModel(Model, JavaTransformer):
         self._java_obj = java_model
         self.uid = java_model.uid()
 
-    def copy(self, extra={}):
+    def copy(self, extra=None):
         """
         Creates a copy of this instance with the same uid and some
         extra params. This implementation first calls Params.copy and
@@ -175,6 +175,8 @@ class JavaModel(Model, JavaTransformer):
         :param extra: Extra parameters to copy to the new instance
         :return: Copy of this instance
         """
+        if extra is None:
+            extra = dict()
         that = super(JavaModel, self).copy(extra)
         that._java_obj = self._java_obj.copy(self._empty_java_param_map())
         that._transfer_params_to_java()


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org