You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2016/01/25 22:54:24 UTC
spark git commit: [SPARK-12905][ML][PYSPARK] PCAModel return
eigenvalues for PySpark
Repository: spark
Updated Branches:
refs/heads/master 9348431da -> dcae355c6
[SPARK-12905][ML][PYSPARK] PCAModel return eigenvalues for PySpark
```PCAModel``` can output ```explainedVariance``` at Python side.
cc mengxr srowen
Author: Yanbo Liang <yb...@gmail.com>
Closes #10830 from yanboliang/spark-12905.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dcae355c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dcae355c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dcae355c
Branch: refs/heads/master
Commit: dcae355c64d7f6fdf61df2feefe464eb96c4cf5e
Parents: 9348431
Author: Yanbo Liang <yb...@gmail.com>
Authored: Mon Jan 25 13:54:21 2016 -0800
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Mon Jan 25 13:54:21 2016 -0800
----------------------------------------------------------------------
.../src/main/scala/org/apache/spark/ml/feature/PCA.scala | 2 ++
python/pyspark/ml/feature.py | 11 +++++++++++
2 files changed, 13 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/dcae355c/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index 7020397..0e07dfa 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -102,6 +102,8 @@ object PCA extends DefaultParamsReadable[PCA] {
* Model fitted by [[PCA]].
*
* @param pc A principal components Matrix. Each column is one principal component.
+ * @param explainedVariance A vector of proportions of variance explained by
+ * each principal component.
*/
@Experimental
class PCAModel private[ml] (
http://git-wip-us.apache.org/repos/asf/spark/blob/dcae355c/python/pyspark/ml/feature.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 141ec34..1fa0eab 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -1987,6 +1987,8 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol):
>>> model = pca.fit(df)
>>> model.transform(df).collect()[0].pca_features
DenseVector([1.648..., -4.013...])
+ >>> model.explainedVariance
+ DenseVector([0.794..., 0.205...])
.. versionadded:: 1.5.0
"""
@@ -2052,6 +2054,15 @@ class PCAModel(JavaModel):
"""
return self._call_java("pc")
+ @property
+ @since("2.0.0")
+ def explainedVariance(self):
+ """
+ Returns a vector of proportions of variance
+ explained by each principal component.
+ """
+ return self._call_java("explainedVariance")
+
@inherit_doc
class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol):
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org