You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by fe...@apache.org on 2018/07/16 03:14:23 UTC
spark git commit: [TRIVIAL][ML] GMM unpersist RDD after training

Repository: spark
Updated Branches:
  refs/heads/master bbc2ffc8a -> bcf7121ed


[TRIVIAL][ML] GMM unpersist RDD after training

## What changes were proposed in this pull request?
unpersist `instances` after training

## How was this patch tested?
existing tests

Author: 郑瑞峰 <zh...@ZBMAC-C02VX5XWH.local>

Closes #21562 from zhengruifeng/gmm_unpersist.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bcf7121e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bcf7121e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bcf7121e

Branch: refs/heads/master
Commit: bcf7121ed2283d88424863ac1d35393870eaae6b
Parents: bbc2ffc
Author: 郑瑞峰 <zh...@ZBMAC-C02VX5XWH.local>
Authored: Sun Jul 15 20:14:17 2018 -0700
Committer: Felix Cheung <fe...@apache.org>
Committed: Sun Jul 15 20:14:17 2018 -0700

----------------------------------------------------------------------
 .../scala/org/apache/spark/ml/clustering/GaussianMixture.scala    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/bcf7121e/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index dae64ba..f0707b3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -341,7 +341,7 @@ class GaussianMixture @Since("2.0.0") (
     val sc = dataset.sparkSession.sparkContext
     val numClusters = $(k)
 
-    val instances: RDD[Vector] = dataset
+    val instances = dataset
       .select(DatasetUtils.columnToVector(dataset, getFeaturesCol)).rdd.map {
       case Row(features: Vector) => features
     }.cache()
@@ -416,6 +416,7 @@ class GaussianMixture @Since("2.0.0") (
       iter += 1
     }
 
+    instances.unpersist(false)
     val gaussianDists = gaussians.map { case (mean, covVec) =>
       val cov = GaussianMixture.unpackUpperTriangularMatrix(numFeatures, covVec.values)
       new MultivariateGaussian(mean, cov)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org