You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2014/11/12 07:47:57 UTC
spark git commit: [MLLIB] SPARK-4347: Reducing GradientBoostingSuite
run time.
Repository: spark
Updated Branches:
refs/heads/master daaca14c1 -> 2ef016b13
[MLLIB] SPARK-4347: Reducing GradientBoostingSuite run time.
Before:
[info] GradientBoostingSuite:
[info] - Regression with continuous features: SquaredError (22 seconds, 115 milliseconds)
[info] - Regression with continuous features: Absolute Error (19 seconds, 330 milliseconds)
[info] - Binary classification with continuous features: Log Loss (19 seconds, 17 milliseconds)
After:
[info] - Regression with continuous features: SquaredError (7 seconds, 69 milliseconds)
[info] - Regression with continuous features: Absolute Error (4 seconds, 617 milliseconds)
[info] - Binary classification with continuous features: Log Loss (4 seconds, 658 milliseconds)
cc: mengxr, jkbradley
Author: Manish Amde <ma...@gmail.com>
Closes #3214 from manishamde/gbt_test_speedup and squashes the following commits:
8994552 [Manish Amde] reducing gbt test run times
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2ef016b1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2ef016b1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2ef016b1
Branch: refs/heads/master
Commit: 2ef016b130a48869cf81fe6cf147ef2b1e79d674
Parents: daaca14
Author: Manish Amde <ma...@gmail.com>
Authored: Tue Nov 11 22:47:53 2014 -0800
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Tue Nov 11 22:47:53 2014 -0800
----------------------------------------------------------------------
.../apache/spark/mllib/tree/GradientBoostingSuite.scala | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/2ef016b1/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala
index 99a02ed..ae0028a 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala
@@ -35,7 +35,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
test("Regression with continuous features: SquaredError") {
GradientBoostingSuite.testCombinations.foreach {
case (numIterations, learningRate, subsamplingRate) =>
- val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
+ val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
val rdd = sc.parallelize(arr)
val categoricalFeaturesInfo = Map.empty[Int, Int]
@@ -53,7 +53,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
assert(gbt.weakHypotheses.size === numIterations)
val gbtTree = gbt.weakHypotheses(0)
- EnsembleTestHelper.validateRegressor(gbt, arr, 0.02)
+ EnsembleTestHelper.validateRegressor(gbt, arr, 0.03)
// Make sure trees are the same.
assert(gbtTree.toString == dt.toString)
@@ -63,7 +63,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
test("Regression with continuous features: Absolute Error") {
GradientBoostingSuite.testCombinations.foreach {
case (numIterations, learningRate, subsamplingRate) =>
- val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
+ val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
val rdd = sc.parallelize(arr)
val categoricalFeaturesInfo = Map.empty[Int, Int]
@@ -81,7 +81,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
assert(gbt.weakHypotheses.size === numIterations)
val gbtTree = gbt.weakHypotheses(0)
- EnsembleTestHelper.validateRegressor(gbt, arr, 0.02)
+ EnsembleTestHelper.validateRegressor(gbt, arr, 0.03)
// Make sure trees are the same.
assert(gbtTree.toString == dt.toString)
@@ -91,7 +91,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
test("Binary classification with continuous features: Log Loss") {
GradientBoostingSuite.testCombinations.foreach {
case (numIterations, learningRate, subsamplingRate) =>
- val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
+ val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
val rdd = sc.parallelize(arr)
val categoricalFeaturesInfo = Map.empty[Int, Int]
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org