You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2019/04/12 14:31:36 UTC

[spark] branch master updated: [MINOR][TEST][ML] Speed up some tests of ML regression by loosening tolerance

This is an automated email from the ASF dual-hosted git repository.

srowen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 9ed60c2  [MINOR][TEST][ML] Speed up some tests of ML regression by loosening tolerance
9ed60c2 is described below

commit 9ed60c2c33737d4017ab8fb2628c40f8b14f3c5c
Author: Sean Owen <se...@databricks.com>
AuthorDate: Fri Apr 12 09:31:12 2019 -0500

    [MINOR][TEST][ML] Speed up some tests of ML regression by loosening tolerance
    
    ## What changes were proposed in this pull request?
    
    Loosen some tolerances in the ML regression-related tests, as they seem to account for some of the top slow tests in https://spark-tests.appspot.com/slow-tests
    
    These changes are good for about a 25 second speedup on my laptop.
    
    ## How was this patch tested?
    
    Existing tests
    
    Closes #24351 from srowen/SpeedReg.
    
    Authored-by: Sean Owen <se...@databricks.com>
    Signed-off-by: Sean Owen <se...@databricks.com>
---
 .../ml/classification/LogisticRegressionSuite.scala | 21 ++++++++++++++-------
 .../GeneralizedLinearRegressionSuite.scala          | 19 +++++++++----------
 .../spark/ml/regression/LinearRegressionSuite.scala |  2 ++
 3 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 9af7fff..334f92b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -1140,8 +1140,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
   test("binary logistic regression with intercept with ElasticNet regularization") {
     val trainer1 = (new LogisticRegression).setFitIntercept(true).setMaxIter(120)
       .setElasticNetParam(0.38).setRegParam(0.21).setStandardization(true).setWeightCol("weight")
+      .setTol(1e-5)
     val trainer2 = (new LogisticRegression).setFitIntercept(true).setMaxIter(60)
       .setElasticNetParam(0.38).setRegParam(0.21).setStandardization(false).setWeightCol("weight")
+      .setTol(1e-5)
 
     val model1 = trainer1.fit(binaryDataset)
     val model2 = trainer2.fit(binaryDataset)
@@ -1489,12 +1491,14 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       .setFitIntercept(true)
       .setStandardization(true)
       .setWeightCol("weight")
+      .setTol(1e-5)
     val trainer2 = new LogisticRegression()
       .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
       .setLowerBoundsOnIntercepts(lowerBoundsOnIntercepts)
       .setFitIntercept(true)
       .setStandardization(false)
       .setWeightCol("weight")
+      .setTol(1e-5)
 
     val model1 = trainer1.fit(multinomialDataset)
     val model2 = trainer2.fit(multinomialDataset)
@@ -1690,10 +1694,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
     // use tighter constraints because OWL-QN solver takes longer to converge
     val trainer1 = (new LogisticRegression).setFitIntercept(true)
       .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(true)
-      .setMaxIter(160).setTol(1e-10).setWeightCol("weight")
+      .setMaxIter(160).setTol(1e-5).setWeightCol("weight")
     val trainer2 = (new LogisticRegression).setFitIntercept(true)
       .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(false)
-      .setMaxIter(110).setTol(1e-10).setWeightCol("weight")
+      .setMaxIter(110).setTol(1e-5).setWeightCol("weight")
 
     val model1 = trainer1.fit(multinomialDataset)
     val model2 = trainer2.fit(multinomialDataset)
@@ -1791,8 +1795,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
   test("multinomial logistic regression without intercept with L1 regularization") {
     val trainer1 = (new LogisticRegression).setFitIntercept(false)
       .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(true).setWeightCol("weight")
+      .setTol(1e-5)
     val trainer2 = (new LogisticRegression).setFitIntercept(false)
       .setElasticNetParam(1.0).setRegParam(0.05).setStandardization(false).setWeightCol("weight")
+      .setTol(1e-5)
 
     val model1 = trainer1.fit(multinomialDataset)
     val model2 = trainer2.fit(multinomialDataset)
@@ -2156,10 +2162,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
   test("multinomial logistic regression with intercept with elasticnet regularization") {
     val trainer1 = (new LogisticRegression).setFitIntercept(true).setWeightCol("weight")
       .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(true)
-      .setMaxIter(220).setTol(1e-10)
+      .setMaxIter(180).setTol(1e-5)
     val trainer2 = (new LogisticRegression).setFitIntercept(true).setWeightCol("weight")
       .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(false)
-      .setMaxIter(220).setTol(1e-10)
+      .setMaxIter(150).setTol(1e-5)
 
     val model1 = trainer1.fit(multinomialDataset)
     val model2 = trainer2.fit(multinomialDataset)
@@ -2255,10 +2261,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
   test("multinomial logistic regression without intercept with elasticnet regularization") {
     val trainer1 = (new LogisticRegression).setFitIntercept(false).setWeightCol("weight")
       .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(true)
-      .setMaxIter(75).setTol(1e-10)
+      .setTol(1e-5)
     val trainer2 = (new LogisticRegression).setFitIntercept(false).setWeightCol("weight")
       .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(false)
-      .setMaxIter(50).setTol(1e-10)
+      .setTol(1e-5)
 
     val model1 = trainer1.fit(multinomialDataset)
     val model2 = trainer2.fit(multinomialDataset)
@@ -2672,6 +2678,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
     val trainer1 = new LogisticRegression()
       .setRegParam(0.1)
       .setElasticNetParam(1.0)
+      .setMaxIter(20)
 
     // compressed row major is optimal
     val model1 = trainer1.fit(multinomialDataset.limit(100))
@@ -2687,7 +2694,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
 
     // coefficients are dense without L1 regularization
     val trainer2 = new LogisticRegression()
-      .setElasticNetParam(0.0)
+      .setElasticNetParam(0.0).setMaxIter(1)
     val model3 = trainer2.fit(multinomialDataset.limit(100))
     assert(model3.coefficientMatrix.isInstanceOf[DenseMatrix])
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index fc1284e..a30c472 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.regression
 
 import scala.util.Random
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.SparkConf
 import org.apache.spark.ml.classification.LogisticRegressionSuite._
 import org.apache.spark.ml.feature.{Instance, OffsetInstance}
 import org.apache.spark.ml.feature.{LabeledPoint, RFormula}
@@ -28,7 +28,6 @@ import org.apache.spark.ml.param.{ParamMap, ParamsSuite}
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.random._
-import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.functions._
@@ -269,7 +268,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
       ("inverse", datasetGaussianInverse))) {
       for (fitIntercept <- Seq(false, true)) {
         val trainer = new GeneralizedLinearRegression().setFamily("gaussian").setLink(link)
-          .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
+          .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
         val model = trainer.fit(dataset)
         val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
         assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with gaussian family, " +
@@ -328,7 +327,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
     for (fitIntercept <- Seq(false, true);
          regParam <- Seq(0.0, 0.1, 1.0)) {
       val trainer = new GeneralizedLinearRegression().setFamily("gaussian")
-        .setFitIntercept(fitIntercept).setRegParam(regParam)
+        .setFitIntercept(fitIntercept).setRegParam(regParam).setTol(1e-3)
       val model = trainer.fit(datasetGaussianIdentity)
       val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
       assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with gaussian family, " +
@@ -384,7 +383,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
       ("cloglog", datasetBinomial))) {
       for (fitIntercept <- Seq(false, true)) {
         val trainer = new GeneralizedLinearRegression().setFamily("binomial").setLink(link)
-          .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
+          .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
         val model = trainer.fit(dataset)
         val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1),
           model.coefficients(2), model.coefficients(3))
@@ -457,7 +456,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
       ("sqrt", datasetPoissonSqrt))) {
       for (fitIntercept <- Seq(false, true)) {
         val trainer = new GeneralizedLinearRegression().setFamily("poisson").setLink(link)
-          .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
+          .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
         val model = trainer.fit(dataset)
         val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
         assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with poisson family, " +
@@ -515,7 +514,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
     val dataset = datasetPoissonLogWithZero
     for (fitIntercept <- Seq(false, true)) {
       val trainer = new GeneralizedLinearRegression().setFamily("poisson").setLink(link)
-        .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
+        .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
       val model = trainer.fit(dataset)
       val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
       assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with poisson family, " +
@@ -573,7 +572,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
       ("identity", datasetGammaIdentity), ("log", datasetGammaLog))) {
       for (fitIntercept <- Seq(false, true)) {
         val trainer = new GeneralizedLinearRegression().setFamily("Gamma").setLink(link)
-          .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
+          .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
         val model = trainer.fit(dataset)
         val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
         assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with gamma family, " +
@@ -659,7 +658,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
          variancePower <- Seq(1.6, 2.5)) {
       val trainer = new GeneralizedLinearRegression().setFamily("tweedie")
         .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
-        .setVariancePower(variancePower).setLinkPower(linkPower)
+        .setVariancePower(variancePower).setLinkPower(linkPower).setTol(1e-4)
       val model = trainer.fit(datasetTweedie)
       val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
       assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with tweedie family, " +
@@ -736,7 +735,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
       for (variancePower <- Seq(0.0, 1.0, 2.0, 1.5)) {
         val trainer = new GeneralizedLinearRegression().setFamily("tweedie")
           .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
-          .setVariancePower(variancePower)
+          .setVariancePower(variancePower).setTol(1e-3)
         val model = trainer.fit(datasetTweedie)
         val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
         assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with tweedie family, " +
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index c4db336..d3df0e5 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -892,6 +892,7 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe
         .setRegParam(regParam)
         .setElasticNetParam(elasticNetParam)
         .setSolver(solver)
+        .setMaxIter(1)
       MLTestingUtils.testArbitrarilyScaledWeights[LinearRegressionModel, LinearRegression](
         datasetWithStrongNoise.as[LabeledPoint], estimator, modelEquals)
       MLTestingUtils.testOutliersWithSmallWeights[LinearRegressionModel, LinearRegression](
@@ -908,6 +909,7 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe
         .setFitIntercept(fitIntercept)
         .setStandardization(standardization)
         .setRegParam(regParam)
+        .setMaxIter(1)
       MLTestingUtils.testArbitrarilyScaledWeights[LinearRegressionModel, LinearRegression](
         datasetWithOutlier.as[LabeledPoint], estimator, modelEquals)
       MLTestingUtils.testOutliersWithSmallWeights[LinearRegressionModel, LinearRegression](


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org