You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2015/12/21 11:21:15 UTC
spark git commit: [SPARK-12349][ML] Make spark.ml PCAModel load
backwards compatible
Repository: spark
Updated Branches:
refs/heads/master ce1798b3a -> d0f695089
[SPARK-12349][ML] Make spark.ml PCAModel load backwards compatible
Only load explainedVariance in PCAModel if it was written with Spark > 1.6.x
jkbradley is this kind of what you had in mind?
Author: Sean Owen <so...@cloudera.com>
Closes #10327 from srowen/SPARK-12349.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d0f69508
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d0f69508
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d0f69508
Branch: refs/heads/master
Commit: d0f695089e4627273133c5f49ef7a83c1840c8f5
Parents: ce1798b
Author: Sean Owen <so...@cloudera.com>
Authored: Mon Dec 21 10:21:22 2015 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Mon Dec 21 10:21:22 2015 +0000
----------------------------------------------------------------------
.../scala/org/apache/spark/ml/feature/PCA.scala | 33 +++++++++++++++++---
1 file changed, 28 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/d0f69508/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index 53d33ea..759be81 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -167,14 +167,37 @@ object PCAModel extends MLReadable[PCAModel] {
private val className = classOf[PCAModel].getName
+ /**
+ * Loads a [[PCAModel]] from data located at the input path. Note that the model includes an
+ * `explainedVariance` member that is not recorded by Spark 1.6 and earlier. A model
+ * can be loaded from such older data but will have an empty vector for
+ * `explainedVariance`.
+ *
+ * @param path path to serialized model data
+ * @return a [[PCAModel]]
+ */
override def load(path: String): PCAModel = {
val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+
+ // explainedVariance field is not present in Spark <= 1.6
+ val versionRegex = "([0-9]+)\\.([0-9])+.*".r
+ val hasExplainedVariance = metadata.sparkVersion match {
+ case versionRegex(major, minor) =>
+ (major.toInt >= 2 || (major.toInt == 1 && minor.toInt > 6))
+ case _ => false
+ }
+
val dataPath = new Path(path, "data").toString
- val Row(pc: DenseMatrix, explainedVariance: DenseVector) =
- sqlContext.read.parquet(dataPath)
- .select("pc", "explainedVariance")
- .head()
- val model = new PCAModel(metadata.uid, pc, explainedVariance)
+ val model = if (hasExplainedVariance) {
+ val Row(pc: DenseMatrix, explainedVariance: DenseVector) =
+ sqlContext.read.parquet(dataPath)
+ .select("pc", "explainedVariance")
+ .head()
+ new PCAModel(metadata.uid, pc, explainedVariance)
+ } else {
+ val Row(pc: DenseMatrix) = sqlContext.read.parquet(dataPath).select("pc").head()
+ new PCAModel(metadata.uid, pc, Vectors.dense(Array.empty[Double]).asInstanceOf[DenseVector])
+ }
DefaultParamsReader.getAndSetParams(model, metadata)
model
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org