You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2016/03/14 20:46:57 UTC

spark git commit: [SPARK-13686][MLLIB][STREAMING] Add a constructor parameter `reqParam` to (Streaming)LinearRegressionWithSGD

Repository: spark
Updated Branches:
  refs/heads/master 23385e853 -> a48296f4f


[SPARK-13686][MLLIB][STREAMING] Add a constructor parameter `reqParam` to (Streaming)LinearRegressionWithSGD

## What changes were proposed in this pull request?

`LinearRegressionWithSGD` and `StreamingLinearRegressionWithSGD` does not have `regParam` as their constructor arguments. They just depends on GradientDescent's default reqParam values.
To be consistent with other algorithms, we had better add them. The same default value is used.

## How was this patch tested?

Pass the existing unit test.

Author: Dongjoon Hyun <do...@apache.org>

Closes #11527 from dongjoon-hyun/SPARK-13686.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a48296f4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a48296f4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a48296f4

Branch: refs/heads/master
Commit: a48296f4fe513b63041f1a26231cfe152b69657f
Parents: 23385e8
Author: Dongjoon Hyun <do...@apache.org>
Authored: Mon Mar 14 12:46:53 2016 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Mon Mar 14 12:46:53 2016 -0700

----------------------------------------------------------------------
 .../spark/mllib/regression/LinearRegression.scala   |  8 +++++---
 .../StreamingLinearRegressionWithSGD.scala          | 16 +++++++++++++---
 project/MimaExcludes.scala                          |  3 +++
 3 files changed, 21 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/a48296f4/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index 7da82c8..e754e74 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -89,6 +89,7 @@ object LinearRegressionModel extends Loader[LinearRegressionModel] {
 class LinearRegressionWithSGD private[mllib] (
     private var stepSize: Double,
     private var numIterations: Int,
+    private var regParam: Double,
     private var miniBatchFraction: Double)
   extends GeneralizedLinearAlgorithm[LinearRegressionModel] with Serializable {
 
@@ -98,6 +99,7 @@ class LinearRegressionWithSGD private[mllib] (
   override val optimizer = new GradientDescent(gradient, updater)
     .setStepSize(stepSize)
     .setNumIterations(numIterations)
+    .setRegParam(regParam)
     .setMiniBatchFraction(miniBatchFraction)
 
   /**
@@ -105,7 +107,7 @@ class LinearRegressionWithSGD private[mllib] (
    * numIterations: 100, miniBatchFraction: 1.0}.
    */
   @Since("0.8.0")
-  def this() = this(1.0, 100, 1.0)
+  def this() = this(1.0, 100, 0.0, 1.0)
 
   override protected[mllib] def createModel(weights: Vector, intercept: Double) = {
     new LinearRegressionModel(weights, intercept)
@@ -141,7 +143,7 @@ object LinearRegressionWithSGD {
       stepSize: Double,
       miniBatchFraction: Double,
       initialWeights: Vector): LinearRegressionModel = {
-    new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction)
+    new LinearRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction)
       .run(input, initialWeights)
   }
 
@@ -163,7 +165,7 @@ object LinearRegressionWithSGD {
       numIterations: Int,
       stepSize: Double,
       miniBatchFraction: Double): LinearRegressionModel = {
-    new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction).run(input)
+    new LinearRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction).run(input)
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/a48296f4/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
index fe2a46b..e8f4422 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
@@ -43,6 +43,7 @@ import org.apache.spark.mllib.linalg.Vector
 class StreamingLinearRegressionWithSGD private[mllib] (
     private var stepSize: Double,
     private var numIterations: Int,
+    private var regParam: Double,
     private var miniBatchFraction: Double)
   extends StreamingLinearAlgorithm[LinearRegressionModel, LinearRegressionWithSGD]
   with Serializable {
@@ -54,10 +55,10 @@ class StreamingLinearRegressionWithSGD private[mllib] (
    * (see `StreamingLinearAlgorithm`)
    */
   @Since("1.1.0")
-  def this() = this(0.1, 50, 1.0)
+  def this() = this(0.1, 50, 0.0, 1.0)
 
   @Since("1.1.0")
-  val algorithm = new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction)
+  val algorithm = new LinearRegressionWithSGD(stepSize, numIterations, regParam, miniBatchFraction)
 
   protected var model: Option[LinearRegressionModel] = None
 
@@ -71,8 +72,17 @@ class StreamingLinearRegressionWithSGD private[mllib] (
   }
 
   /**
-   * Set the number of iterations of gradient descent to run per update. Default: 50.
+   * Set the regularization parameter. Default: 0.0.
    */
+  @Since("2.0.0")
+  def setRegParam(regParam: Double): this.type = {
+    this.algorithm.optimizer.setRegParam(regParam)
+    this
+  }
+
+  /**
+    * Set the number of iterations of gradient descent to run per update. Default: 50.
+    */
   @Since("1.1.0")
   def setNumIterations(numIterations: Int): this.type = {
     this.algorithm.optimizer.setNumIterations(numIterations)

http://git-wip-us.apache.org/repos/asf/spark/blob/a48296f4/project/MimaExcludes.scala
----------------------------------------------------------------------
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index faa52bf..a9973bc 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -318,6 +318,9 @@ object MimaExcludes {
         ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.mllib.evaluation.MultilabelMetrics.this"),
         ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.predictions"),
         ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.predictions")
+      ) ++ Seq(
+        // [SPARK-13686][MLLIB][STREAMING] Add a constructor parameter `reqParam` to (Streaming)LinearRegressionWithSGD
+        ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD.this")
       )
     case v if v.startsWith("1.6") =>
       Seq(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org