You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by jk...@apache.org on 2017/02/24 02:06:07 UTC
spark git commit: [SPARK-14772][PYTHON][ML] Fixed Params.copy method
to match Scala implementation
Repository: spark
Updated Branches:
refs/heads/master d02762457 -> 2f69e3f60
[SPARK-14772][PYTHON][ML] Fixed Params.copy method to match Scala implementation
## What changes were proposed in this pull request?
Fixed the PySpark Params.copy method to behave like the Scala implementation. The main issue was that it did not account for the _defaultParamMap and merged it into the explicitly created param map.
## How was this patch tested?
Added new unit test to verify the copy method behaves correctly for copying uid, explicitly created params, and default params.
Author: Bryan Cutler <cu...@gmail.com>
Closes #16772 from BryanCutler/pyspark-ml-param_copy-Scala_sync-SPARK-14772.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2f69e3f6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2f69e3f6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2f69e3f6
Branch: refs/heads/master
Commit: 2f69e3f60f27d4598f001a5454abc21f739120a6
Parents: d027624
Author: Bryan Cutler <cu...@gmail.com>
Authored: Thu Feb 23 18:05:58 2017 -0800
Committer: Joseph K. Bradley <jo...@databricks.com>
Committed: Thu Feb 23 18:05:58 2017 -0800
----------------------------------------------------------------------
python/pyspark/ml/param/__init__.py | 17 +++++++++++------
python/pyspark/ml/tests.py | 16 ++++++++++++++++
2 files changed, 27 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/2f69e3f6/python/pyspark/ml/param/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py
index dc3d23f..99d8fa3 100644
--- a/python/pyspark/ml/param/__init__.py
+++ b/python/pyspark/ml/param/__init__.py
@@ -372,6 +372,7 @@ class Params(Identifiable):
extra = dict()
that = copy.copy(self)
that._paramMap = {}
+ that._defaultParamMap = {}
return self._copyValues(that, extra)
def _shouldOwn(self, param):
@@ -452,12 +453,16 @@ class Params(Identifiable):
:param extra: extra params to be copied
:return: the target instance with param values copied
"""
- if extra is None:
- extra = dict()
- paramMap = self.extractParamMap(extra)
- for p in self.params:
- if p in paramMap and to.hasParam(p.name):
- to._set(**{p.name: paramMap[p]})
+ paramMap = self._paramMap.copy()
+ if extra is not None:
+ paramMap.update(extra)
+ for param in self.params:
+ # copy default params
+ if param in self._defaultParamMap and to.hasParam(param.name):
+ to._defaultParamMap[to.getParam(param.name)] = self._defaultParamMap[param]
+ # copy explicitly set params
+ if param in paramMap and to.hasParam(param.name):
+ to._set(**{param.name: paramMap[param]})
return to
def _resetUid(self, newUid):
http://git-wip-us.apache.org/repos/asf/spark/blob/2f69e3f6/python/pyspark/ml/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 53204cd..293c6c0 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -389,6 +389,22 @@ class ParamTests(PySparkTestCase):
# Check windowSize is set properly
self.assertEqual(model.getWindowSize(), 6)
+ def test_copy_param_extras(self):
+ tp = TestParams(seed=42)
+ extra = {tp.getParam(TestParams.inputCol.name): "copy_input"}
+ tp_copy = tp.copy(extra=extra)
+ self.assertEqual(tp.uid, tp_copy.uid)
+ self.assertEqual(tp.params, tp_copy.params)
+ for k, v in extra.items():
+ self.assertTrue(tp_copy.isDefined(k))
+ self.assertEqual(tp_copy.getOrDefault(k), v)
+ copied_no_extra = {}
+ for k, v in tp_copy._paramMap.items():
+ if k not in extra:
+ copied_no_extra[k] = v
+ self.assertEqual(tp._paramMap, copied_no_extra)
+ self.assertEqual(tp._defaultParamMap, tp_copy._defaultParamMap)
+
class EvaluatorTests(SparkSessionTestCase):
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org