You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/07/07 21:35:43 UTC

spark git commit: [SPARK-8704] [ML] [PySpark] Add missing methods in StandardScaler

Repository: spark
Updated Branches:
  refs/heads/master 3336c7b14 -> 35d781e71


[SPARK-8704] [ML] [PySpark] Add missing methods in StandardScaler

Add std, mean to StandardScalerModel
getVectors, findSynonyms to Word2Vec Model
setFeatures and getFeatures to hashingTF

Author: MechCoder <ma...@gmail.com>

Closes #7086 from MechCoder/missing_model_methods and squashes the following commits:

9fbae90 [MechCoder] Add type
6e3d6b2 [MechCoder] [SPARK-8704] Add missing methods in StandardScaler (ML and PySpark)


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/35d781e7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/35d781e7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/35d781e7

Branch: refs/heads/master
Commit: 35d781e71b68eb6da7f49fdae40fa6c4f8e27060
Parents: 3336c7b
Author: MechCoder <ma...@gmail.com>
Authored: Tue Jul 7 12:35:40 2015 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Tue Jul 7 12:35:40 2015 -0700

----------------------------------------------------------------------
 .../apache/spark/ml/feature/StandardScaler.scala  |  6 ++++++
 python/pyspark/ml/feature.py                      | 18 ++++++++++++++++++
 2 files changed, 24 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/35d781e7/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
index ca3c1cf..72b545e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -106,6 +106,12 @@ class StandardScalerModel private[ml] (
     scaler: feature.StandardScalerModel)
   extends Model[StandardScalerModel] with StandardScalerParams {
 
+  /** Standard deviation of the StandardScalerModel */
+  val std: Vector = scaler.std
+
+  /** Mean of the StandardScalerModel */
+  val mean: Vector = scaler.mean
+
   /** @group setParam */
   def setInputCol(value: String): this.type = set(inputCol, value)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/35d781e7/python/pyspark/ml/feature.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 8804dac..9bca7cc 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -627,6 +627,10 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
     >>> df = sqlContext.createDataFrame([(Vectors.dense([0.0]),), (Vectors.dense([2.0]),)], ["a"])
     >>> standardScaler = StandardScaler(inputCol="a", outputCol="scaled")
     >>> model = standardScaler.fit(df)
+    >>> model.mean
+    DenseVector([1.0])
+    >>> model.std
+    DenseVector([1.4142])
     >>> model.transform(df).collect()[1].scaled
     DenseVector([1.4142])
     """
@@ -692,6 +696,20 @@ class StandardScalerModel(JavaModel):
     Model fitted by StandardScaler.
     """
 
+    @property
+    def std(self):
+        """
+        Standard deviation of the StandardScalerModel.
+        """
+        return self._call_java("std")
+
+    @property
+    def mean(self):
+        """
+        Mean of the StandardScalerModel.
+        """
+        return self._call_java("mean")
+
 
 @inherit_doc
 class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol):


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org