You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2014/11/12 07:47:57 UTC

spark git commit: [MLLIB] SPARK-4347: Reducing GradientBoostingSuite run time.

Repository: spark
Updated Branches:
  refs/heads/master daaca14c1 -> 2ef016b13


[MLLIB] SPARK-4347: Reducing GradientBoostingSuite run time.

Before:
[info] GradientBoostingSuite:
[info] - Regression with continuous features: SquaredError (22 seconds, 115 milliseconds)
[info] - Regression with continuous features: Absolute Error (19 seconds, 330 milliseconds)
[info] - Binary classification with continuous features: Log Loss (19 seconds, 17 milliseconds)

After:
[info] - Regression with continuous features: SquaredError (7 seconds, 69 milliseconds)
[info] - Regression with continuous features: Absolute Error (4 seconds, 617 milliseconds)
[info] - Binary classification with continuous features: Log Loss (4 seconds, 658 milliseconds)

cc: mengxr, jkbradley

Author: Manish Amde <ma...@gmail.com>

Closes #3214 from manishamde/gbt_test_speedup and squashes the following commits:

8994552 [Manish Amde] reducing gbt test run times


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2ef016b1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2ef016b1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2ef016b1

Branch: refs/heads/master
Commit: 2ef016b130a48869cf81fe6cf147ef2b1e79d674
Parents: daaca14
Author: Manish Amde <ma...@gmail.com>
Authored: Tue Nov 11 22:47:53 2014 -0800
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Tue Nov 11 22:47:53 2014 -0800

----------------------------------------------------------------------
 .../apache/spark/mllib/tree/GradientBoostingSuite.scala   | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2ef016b1/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala
index 99a02ed..ae0028a 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala
@@ -35,7 +35,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
   test("Regression with continuous features: SquaredError") {
     GradientBoostingSuite.testCombinations.foreach {
       case (numIterations, learningRate, subsamplingRate) =>
-        val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
+        val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
         val rdd = sc.parallelize(arr)
         val categoricalFeaturesInfo = Map.empty[Int, Int]
 
@@ -53,7 +53,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
         assert(gbt.weakHypotheses.size === numIterations)
         val gbtTree = gbt.weakHypotheses(0)
 
-        EnsembleTestHelper.validateRegressor(gbt, arr, 0.02)
+        EnsembleTestHelper.validateRegressor(gbt, arr, 0.03)
 
         // Make sure trees are the same.
         assert(gbtTree.toString == dt.toString)
@@ -63,7 +63,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
   test("Regression with continuous features: Absolute Error") {
     GradientBoostingSuite.testCombinations.foreach {
       case (numIterations, learningRate, subsamplingRate) =>
-        val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
+        val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
         val rdd = sc.parallelize(arr)
         val categoricalFeaturesInfo = Map.empty[Int, Int]
 
@@ -81,7 +81,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
         assert(gbt.weakHypotheses.size === numIterations)
         val gbtTree = gbt.weakHypotheses(0)
 
-        EnsembleTestHelper.validateRegressor(gbt, arr, 0.02)
+        EnsembleTestHelper.validateRegressor(gbt, arr, 0.03)
 
         // Make sure trees are the same.
         assert(gbtTree.toString == dt.toString)
@@ -91,7 +91,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
   test("Binary classification with continuous features: Log Loss") {
     GradientBoostingSuite.testCombinations.foreach {
       case (numIterations, learningRate, subsamplingRate) =>
-        val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
+        val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
         val rdd = sc.parallelize(arr)
         val categoricalFeaturesInfo = Map.empty[Int, Int]
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org