You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by fe...@apache.org on 2018/07/16 03:14:23 UTC
spark git commit: [TRIVIAL][ML] GMM unpersist RDD after training
Repository: spark
Updated Branches:
refs/heads/master bbc2ffc8a -> bcf7121ed
[TRIVIAL][ML] GMM unpersist RDD after training
## What changes were proposed in this pull request?
unpersist `instances` after training
## How was this patch tested?
existing tests
Author: 郑瑞峰 <zh...@ZBMAC-C02VX5XWH.local>
Closes #21562 from zhengruifeng/gmm_unpersist.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bcf7121e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bcf7121e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bcf7121e
Branch: refs/heads/master
Commit: bcf7121ed2283d88424863ac1d35393870eaae6b
Parents: bbc2ffc
Author: 郑瑞峰 <zh...@ZBMAC-C02VX5XWH.local>
Authored: Sun Jul 15 20:14:17 2018 -0700
Committer: Felix Cheung <fe...@apache.org>
Committed: Sun Jul 15 20:14:17 2018 -0700
----------------------------------------------------------------------
.../scala/org/apache/spark/ml/clustering/GaussianMixture.scala | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/bcf7121e/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index dae64ba..f0707b3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -341,7 +341,7 @@ class GaussianMixture @Since("2.0.0") (
val sc = dataset.sparkSession.sparkContext
val numClusters = $(k)
- val instances: RDD[Vector] = dataset
+ val instances = dataset
.select(DatasetUtils.columnToVector(dataset, getFeaturesCol)).rdd.map {
case Row(features: Vector) => features
}.cache()
@@ -416,6 +416,7 @@ class GaussianMixture @Since("2.0.0") (
iter += 1
}
+ instances.unpersist(false)
val gaussianDists = gaussians.map { case (mean, covVec) =>
val cov = GaussianMixture.unpackUpperTriangularMatrix(numFeatures, covVec.values)
new MultivariateGaussian(mean, cov)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org