You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2016/01/25 22:54:24 UTC

spark git commit: [SPARK-12905][ML][PYSPARK] PCAModel return eigenvalues for PySpark

Repository: spark
Updated Branches:
  refs/heads/master 9348431da -> dcae355c6


[SPARK-12905][ML][PYSPARK] PCAModel return eigenvalues for PySpark

```PCAModel```  can output ```explainedVariance``` at Python side.

cc mengxr srowen

Author: Yanbo Liang <yb...@gmail.com>

Closes #10830 from yanboliang/spark-12905.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dcae355c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dcae355c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dcae355c

Branch: refs/heads/master
Commit: dcae355c64d7f6fdf61df2feefe464eb96c4cf5e
Parents: 9348431
Author: Yanbo Liang <yb...@gmail.com>
Authored: Mon Jan 25 13:54:21 2016 -0800
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Mon Jan 25 13:54:21 2016 -0800

----------------------------------------------------------------------
 .../src/main/scala/org/apache/spark/ml/feature/PCA.scala |  2 ++
 python/pyspark/ml/feature.py                             | 11 +++++++++++
 2 files changed, 13 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/dcae355c/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index 7020397..0e07dfa 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -102,6 +102,8 @@ object PCA extends DefaultParamsReadable[PCA] {
  * Model fitted by [[PCA]].
  *
  * @param pc A principal components Matrix. Each column is one principal component.
+ * @param explainedVariance A vector of proportions of variance explained by
+ *                          each principal component.
  */
 @Experimental
 class PCAModel private[ml] (

http://git-wip-us.apache.org/repos/asf/spark/blob/dcae355c/python/pyspark/ml/feature.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 141ec34..1fa0eab 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -1987,6 +1987,8 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol):
     >>> model = pca.fit(df)
     >>> model.transform(df).collect()[0].pca_features
     DenseVector([1.648..., -4.013...])
+    >>> model.explainedVariance
+    DenseVector([0.794..., 0.205...])
 
     .. versionadded:: 1.5.0
     """
@@ -2052,6 +2054,15 @@ class PCAModel(JavaModel):
         """
         return self._call_java("pc")
 
+    @property
+    @since("2.0.0")
+    def explainedVariance(self):
+        """
+        Returns a vector of proportions of variance
+        explained by each principal component.
+        """
+        return self._call_java("explainedVariance")
+
 
 @inherit_doc
 class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol):


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org