You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2019/04/12 14:31:36 UTC
[spark] branch master updated: [MINOR][TEST][ML] Speed up some
tests of ML regression by loosening tolerance
This is an automated email from the ASF dual-hosted git repository.
srowen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 9ed60c2 [MINOR][TEST][ML] Speed up some tests of ML regression by loosening tolerance
9ed60c2 is described below
commit 9ed60c2c33737d4017ab8fb2628c40f8b14f3c5c
Author: Sean Owen <se...@databricks.com>
AuthorDate: Fri Apr 12 09:31:12 2019 -0500
[MINOR][TEST][ML] Speed up some tests of ML regression by loosening tolerance
## What changes were proposed in this pull request?
Loosen some tolerances in the ML regression-related tests, as they seem to account for some of the top slow tests in https://spark-tests.appspot.com/slow-tests
These changes are good for about a 25 second speedup on my laptop.
## How was this patch tested?
Existing tests
Closes #24351 from srowen/SpeedReg.
Authored-by: Sean Owen <se...@databricks.com>
Signed-off-by: Sean Owen <se...@databricks.com>
---
.../ml/classification/LogisticRegressionSuite.scala | 21 ++++++++++++++-------
.../GeneralizedLinearRegressionSuite.scala | 19 +++++++++----------
.../spark/ml/regression/LinearRegressionSuite.scala | 2 ++
3 files changed, 25 insertions(+), 17 deletions(-)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 9af7fff..334f92b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -1140,8 +1140,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
test("binary logistic regression with intercept with ElasticNet regularization") {
val trainer1 = (new LogisticRegression).setFitIntercept(true).setMaxIter(120)
.setElasticNetParam(0.38).setRegParam(0.21).setStandardization(true).setWeightCol("weight")
+ .setTol(1e-5)
val trainer2 = (new LogisticRegression).setFitIntercept(true).setMaxIter(60)
.setElasticNetParam(0.38).setRegParam(0.21).setStandardization(false).setWeightCol("weight")
+ .setTol(1e-5)
val model1 = trainer1.fit(binaryDataset)
val model2 = trainer2.fit(binaryDataset)
@@ -1489,12 +1491,14 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
.setFitIntercept(true)
.setStandardization(true)
.setWeightCol("weight")
+ .setTol(1e-5)
val trainer2 = new LogisticRegression()
.setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
.setLowerBoundsOnIntercepts(lowerBoundsOnIntercepts)
.setFitIntercept(true)
.setStandardization(false)
.setWeightCol("weight")
+ .setTol(1e-5)
val model1 = trainer1.fit(multinomialDataset)
val model2 = trainer2.fit(multinomialDataset)
@@ -1690,10 +1694,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
// use tighter constraints because OWL-QN solver takes longer to converge
val trainer1 = (new LogisticRegression).setFitIntercept(true)
.setElasticNetParam(1.0).setRegParam(0.05).setStandardization(true)
- .setMaxIter(160).setTol(1e-10).setWeightCol("weight")
+ .setMaxIter(160).setTol(1e-5).setWeightCol("weight")
val trainer2 = (new LogisticRegression).setFitIntercept(true)
.setElasticNetParam(1.0).setRegParam(0.05).setStandardization(false)
- .setMaxIter(110).setTol(1e-10).setWeightCol("weight")
+ .setMaxIter(110).setTol(1e-5).setWeightCol("weight")
val model1 = trainer1.fit(multinomialDataset)
val model2 = trainer2.fit(multinomialDataset)
@@ -1791,8 +1795,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
test("multinomial logistic regression without intercept with L1 regularization") {
val trainer1 = (new LogisticRegression).setFitIntercept(false)
.setElasticNetParam(1.0).setRegParam(0.05).setStandardization(true).setWeightCol("weight")
+ .setTol(1e-5)
val trainer2 = (new LogisticRegression).setFitIntercept(false)
.setElasticNetParam(1.0).setRegParam(0.05).setStandardization(false).setWeightCol("weight")
+ .setTol(1e-5)
val model1 = trainer1.fit(multinomialDataset)
val model2 = trainer2.fit(multinomialDataset)
@@ -2156,10 +2162,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
test("multinomial logistic regression with intercept with elasticnet regularization") {
val trainer1 = (new LogisticRegression).setFitIntercept(true).setWeightCol("weight")
.setElasticNetParam(0.5).setRegParam(0.1).setStandardization(true)
- .setMaxIter(220).setTol(1e-10)
+ .setMaxIter(180).setTol(1e-5)
val trainer2 = (new LogisticRegression).setFitIntercept(true).setWeightCol("weight")
.setElasticNetParam(0.5).setRegParam(0.1).setStandardization(false)
- .setMaxIter(220).setTol(1e-10)
+ .setMaxIter(150).setTol(1e-5)
val model1 = trainer1.fit(multinomialDataset)
val model2 = trainer2.fit(multinomialDataset)
@@ -2255,10 +2261,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
test("multinomial logistic regression without intercept with elasticnet regularization") {
val trainer1 = (new LogisticRegression).setFitIntercept(false).setWeightCol("weight")
.setElasticNetParam(0.5).setRegParam(0.1).setStandardization(true)
- .setMaxIter(75).setTol(1e-10)
+ .setTol(1e-5)
val trainer2 = (new LogisticRegression).setFitIntercept(false).setWeightCol("weight")
.setElasticNetParam(0.5).setRegParam(0.1).setStandardization(false)
- .setMaxIter(50).setTol(1e-10)
+ .setTol(1e-5)
val model1 = trainer1.fit(multinomialDataset)
val model2 = trainer2.fit(multinomialDataset)
@@ -2672,6 +2678,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
val trainer1 = new LogisticRegression()
.setRegParam(0.1)
.setElasticNetParam(1.0)
+ .setMaxIter(20)
// compressed row major is optimal
val model1 = trainer1.fit(multinomialDataset.limit(100))
@@ -2687,7 +2694,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
// coefficients are dense without L1 regularization
val trainer2 = new LogisticRegression()
- .setElasticNetParam(0.0)
+ .setElasticNetParam(0.0).setMaxIter(1)
val model3 = trainer2.fit(multinomialDataset.limit(100))
assert(model3.coefficientMatrix.isInstanceOf[DenseMatrix])
}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index fc1284e..a30c472 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.regression
import scala.util.Random
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.SparkConf
import org.apache.spark.ml.classification.LogisticRegressionSuite._
import org.apache.spark.ml.feature.{Instance, OffsetInstance}
import org.apache.spark.ml.feature.{LabeledPoint, RFormula}
@@ -28,7 +28,6 @@ import org.apache.spark.ml.param.{ParamMap, ParamsSuite}
import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
import org.apache.spark.ml.util.TestingUtils._
import org.apache.spark.mllib.random._
-import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.functions._
@@ -269,7 +268,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
("inverse", datasetGaussianInverse))) {
for (fitIntercept <- Seq(false, true)) {
val trainer = new GeneralizedLinearRegression().setFamily("gaussian").setLink(link)
- .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
+ .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
val model = trainer.fit(dataset)
val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with gaussian family, " +
@@ -328,7 +327,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
for (fitIntercept <- Seq(false, true);
regParam <- Seq(0.0, 0.1, 1.0)) {
val trainer = new GeneralizedLinearRegression().setFamily("gaussian")
- .setFitIntercept(fitIntercept).setRegParam(regParam)
+ .setFitIntercept(fitIntercept).setRegParam(regParam).setTol(1e-3)
val model = trainer.fit(datasetGaussianIdentity)
val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with gaussian family, " +
@@ -384,7 +383,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
("cloglog", datasetBinomial))) {
for (fitIntercept <- Seq(false, true)) {
val trainer = new GeneralizedLinearRegression().setFamily("binomial").setLink(link)
- .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
+ .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
val model = trainer.fit(dataset)
val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1),
model.coefficients(2), model.coefficients(3))
@@ -457,7 +456,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
("sqrt", datasetPoissonSqrt))) {
for (fitIntercept <- Seq(false, true)) {
val trainer = new GeneralizedLinearRegression().setFamily("poisson").setLink(link)
- .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
+ .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
val model = trainer.fit(dataset)
val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with poisson family, " +
@@ -515,7 +514,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
val dataset = datasetPoissonLogWithZero
for (fitIntercept <- Seq(false, true)) {
val trainer = new GeneralizedLinearRegression().setFamily("poisson").setLink(link)
- .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
+ .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
val model = trainer.fit(dataset)
val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with poisson family, " +
@@ -573,7 +572,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
("identity", datasetGammaIdentity), ("log", datasetGammaLog))) {
for (fitIntercept <- Seq(false, true)) {
val trainer = new GeneralizedLinearRegression().setFamily("Gamma").setLink(link)
- .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
+ .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
val model = trainer.fit(dataset)
val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with gamma family, " +
@@ -659,7 +658,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
variancePower <- Seq(1.6, 2.5)) {
val trainer = new GeneralizedLinearRegression().setFamily("tweedie")
.setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
- .setVariancePower(variancePower).setLinkPower(linkPower)
+ .setVariancePower(variancePower).setLinkPower(linkPower).setTol(1e-4)
val model = trainer.fit(datasetTweedie)
val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with tweedie family, " +
@@ -736,7 +735,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
for (variancePower <- Seq(0.0, 1.0, 2.0, 1.5)) {
val trainer = new GeneralizedLinearRegression().setFamily("tweedie")
.setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
- .setVariancePower(variancePower)
+ .setVariancePower(variancePower).setTol(1e-3)
val model = trainer.fit(datasetTweedie)
val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with tweedie family, " +
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index c4db336..d3df0e5 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -892,6 +892,7 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe
.setRegParam(regParam)
.setElasticNetParam(elasticNetParam)
.setSolver(solver)
+ .setMaxIter(1)
MLTestingUtils.testArbitrarilyScaledWeights[LinearRegressionModel, LinearRegression](
datasetWithStrongNoise.as[LabeledPoint], estimator, modelEquals)
MLTestingUtils.testOutliersWithSmallWeights[LinearRegressionModel, LinearRegression](
@@ -908,6 +909,7 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe
.setFitIntercept(fitIntercept)
.setStandardization(standardization)
.setRegParam(regParam)
+ .setMaxIter(1)
MLTestingUtils.testArbitrarilyScaledWeights[LinearRegressionModel, LinearRegression](
datasetWithOutlier.as[LabeledPoint], estimator, modelEquals)
MLTestingUtils.testOutliersWithSmallWeights[LinearRegressionModel, LinearRegression](
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org