You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/04/13 07:17:19 UTC
spark git commit: [SPARK-6643][MLLIB] Implement StandardScalerModel missing methods

Repository: spark
Updated Branches:
  refs/heads/master a1fe59dae -> fc1766147


[SPARK-6643][MLLIB] Implement StandardScalerModel missing methods

This is the sub-task of SPARK-6254.
Wrap missing method for `StandardScalerModel`.

Author: lewuathe <le...@me.com>

Closes #5310 from Lewuathe/SPARK-6643 and squashes the following commits:

fafd690 [lewuathe] Fix for lint-python
bd31a64 [lewuathe] Merge branch 'master' into SPARK-6643
578f5ee [lewuathe] Remove unnecessary class
a38f155 [lewuathe] Merge master
66bb2ab [lewuathe] Fix typos
82683a0 [lewuathe] [SPARK-6643] Implement StandardScalerModel missing methods


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc176614
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc176614
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc176614

Branch: refs/heads/master
Commit: fc17661475443d9f0a8d28e3439feeb7a7bca67b
Parents: a1fe59d
Author: lewuathe <le...@me.com>
Authored: Sun Apr 12 22:17:16 2015 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Sun Apr 12 22:17:16 2015 -0700

----------------------------------------------------------------------
 .../spark/mllib/api/python/PythonMLLibAPI.scala |  4 ++--
 python/pyspark/mllib/feature.py                 | 16 +++++++++++++
 python/pyspark/mllib/tests.py                   | 24 ++++++++++++++++++++
 3 files changed, 42 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/fc176614/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 1faa3de..ab15f0f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -450,9 +450,9 @@ private[python] class PythonMLLibAPI extends Serializable {
   def normalizeVector(p: Double, rdd: JavaRDD[Vector]): JavaRDD[Vector] = {
     new Normalizer(p).transform(rdd)
   }
-
+  
   /**
-   * Java stub for IDF.fit(). This stub returns a
+   * Java stub for StandardScaler.fit(). This stub returns a
    * handle to the Java object instead of the content of the Java object.
    * Extra care needs to be taken in the Python code to ensure it gets freed on
    * exit; see the Py4J documentation.

http://git-wip-us.apache.org/repos/asf/spark/blob/fc176614/python/pyspark/mllib/feature.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index 3cda120..8be819a 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -132,6 +132,22 @@ class StandardScalerModel(JavaVectorTransformer):
         """
         return JavaVectorTransformer.transform(self, vector)
 
+    def setWithMean(self, withMean):
+        """
+        Setter of the boolean which decides
+        whether it uses mean or not
+        """
+        self.call("setWithMean", withMean)
+        return self
+
+    def setWithStd(self, withStd):
+        """
+        Setter of the boolean which decides
+        whether it uses std or not
+        """
+        self.call("setWithStd", withStd)
+        return self
+
 
 class StandardScaler(object):
     """

http://git-wip-us.apache.org/repos/asf/spark/blob/fc176614/python/pyspark/mllib/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index 3b40158..8eaddcf 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -44,6 +44,7 @@ from pyspark.mllib.random import RandomRDDs
 from pyspark.mllib.stat import Statistics
 from pyspark.mllib.feature import Word2Vec
 from pyspark.mllib.feature import IDF
+from pyspark.mllib.feature import StandardScaler
 from pyspark.serializers import PickleSerializer
 from pyspark.sql import SQLContext
 from pyspark.tests import ReusedPySparkTestCase as PySparkTestCase
@@ -745,6 +746,29 @@ class Word2VecTests(PySparkTestCase):
         model = Word2Vec().fit(self.sc.parallelize(data))
         self.assertEquals(len(model.getVectors()), 3)
 
+
+class StandardScalerTests(PySparkTestCase):
+    def test_model_setters(self):
+        data = [
+            [1.0, 2.0, 3.0],
+            [2.0, 3.0, 4.0],
+            [3.0, 4.0, 5.0]
+        ]
+        model = StandardScaler().fit(self.sc.parallelize(data))
+        self.assertIsNotNone(model.setWithMean(True))
+        self.assertIsNotNone(model.setWithStd(True))
+        self.assertEqual(model.transform([1.0, 2.0, 3.0]), DenseVector([-1.0, -1.0, -1.0]))
+
+    def test_model_transform(self):
+        data = [
+            [1.0, 2.0, 3.0],
+            [2.0, 3.0, 4.0],
+            [3.0, 4.0, 5.0]
+        ]
+        model = StandardScaler().fit(self.sc.parallelize(data))
+        self.assertEqual(model.transform([1.0, 2.0, 3.0]), DenseVector([1.0, 2.0, 3.0]))
+
+
 if __name__ == "__main__":
     if not _have_scipy:
         print "NOTE: Skipping SciPy tests as it does not seem to be installed"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org