You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/01/30 17:05:51 UTC
[spark] branch master updated: [SPARK-30678][MLLIB][TESTS] Eliminate warnings from deprecated BisectingKMeansModel.computeCost

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new a291433  [SPARK-30678][MLLIB][TESTS] Eliminate warnings from deprecated BisectingKMeansModel.computeCost
a291433 is described below

commit a291433ed316932618583544ee6d0f1b2f829b80
Author: Maxim Gekk <ma...@gmail.com>
AuthorDate: Thu Jan 30 09:05:14 2020 -0800

    [SPARK-30678][MLLIB][TESTS] Eliminate warnings from deprecated BisectingKMeansModel.computeCost
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to replace deprecated method `computeCost` of `BisectingKMeansModel` by `summary.trainingCost`.
    
    ### Why are the changes needed?
    The changes eliminate deprecation warnings:
    ```
    BisectingKMeansSuite.scala:108: method computeCost in class BisectingKMeansModel is deprecated (since 3.0.0): This method is deprecated and will be removed in future versions. Use ClusteringEvaluator instead. You can also get the cost on the training dataset in the summary.
    [warn]     assert(model.computeCost(dataset) < 0.1)
    BisectingKMeansSuite.scala:135: method computeCost in class BisectingKMeansModel is deprecated (since 3.0.0): This method is deprecated and will be removed in future versions. Use ClusteringEvaluator instead. You can also get the cost on the training dataset in the summary.
    [warn]     assert(model.computeCost(dataset) == summary.trainingCost)
    BisectingKMeansSuite.scala:323: method computeCost in class BisectingKMeansModel is deprecated (since 3.0.0): This method is deprecated and will be removed in future versions. Use ClusteringEvaluator instead. You can also get the cost on the training dataset in the summary.
    [warn]       model.computeCost(dataset)
    ```
    
    ### Does this PR introduce any user-facing change?
    No
    
    ### How was this patch tested?
    By running `BisectingKMeansSuite` via:
    ```
    ./build/sbt "test:testOnly *BisectingKMeansSuite"
    ```
    
    Closes #27401 from MaxGekk/kmeans-computeCost-warning.
    
    Authored-by: Maxim Gekk <ma...@gmail.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
index fc756d4..debd0dd 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
@@ -105,7 +105,7 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
     val bkm = new BisectingKMeans().setK(k).setPredictionCol(predictionColName).setSeed(1)
     val model = bkm.fit(dataset)
     assert(model.clusterCenters.length === k)
-    assert(model.computeCost(dataset) < 0.1)
+    assert(model.summary.trainingCost < 0.1)
     assert(model.hasParent)
 
     testTransformerByGlobalCheckFunc[Tuple1[Vector]](dataset.toDF(), model,
@@ -132,7 +132,7 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
     assert(clusterSizes.forall(_ >= 0))
     assert(summary.numIter == 20)
     assert(summary.trainingCost < 0.1)
-    assert(model.computeCost(dataset) == summary.trainingCost)
+    assert(model.summary.trainingCost == summary.trainingCost)
 
     model.setSummary(None)
     assert(!model.hasSummary)
@@ -320,7 +320,7 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
   test("BisectingKMeans with Array input") {
     def trainAndComputeCost(dataset: DataFrame): Double = {
       val model = new BisectingKMeans().setK(k).setMaxIter(1).setSeed(1).fit(dataset)
-      model.computeCost(dataset)
+      model.summary.trainingCost
     }
 
     val (newDataset, newDatasetD, newDatasetF) = MLTestingUtils.generateArrayFeatureDataset(dataset)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org