You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2016/03/14 20:46:57 UTC
spark git commit: [SPARK-13686][MLLIB][STREAMING] Add a constructor
parameter `reqParam` to (Streaming)LinearRegressionWithSGD
Repository: spark
Updated Branches:
refs/heads/master 23385e853 -> a48296f4f
[SPARK-13686][MLLIB][STREAMING] Add a constructor parameter `reqParam` to (Streaming)LinearRegressionWithSGD
## What changes were proposed in this pull request?
`LinearRegressionWithSGD` and `StreamingLinearRegressionWithSGD` does not have `regParam` as their constructor arguments. They just depends on GradientDescent's default reqParam values.
To be consistent with other algorithms, we had better add them. The same default value is used.
## How was this patch tested?
Pass the existing unit test.
Author: Dongjoon Hyun <do...@apache.org>
Closes #11527 from dongjoon-hyun/SPARK-13686.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a48296f4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a48296f4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a48296f4
Branch: refs/heads/master
Commit: a48296f4fe513b63041f1a26231cfe152b69657f
Parents: 23385e8
Author: Dongjoon Hyun <do...@apache.org>
Authored: Mon Mar 14 12:46:53 2016 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Mon Mar 14 12:46:53 2016 -0700
----------------------------------------------------------------------
.../spark/mllib/regression/LinearRegression.scala | 8 +++++---
.../StreamingLinearRegressionWithSGD.scala | 16 +++++++++++++---
project/MimaExcludes.scala | 3 +++
3 files changed, 21 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/a48296f4/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index 7da82c8..e754e74 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -89,6 +89,7 @@ object LinearRegressionModel extends Loader[LinearRegressionModel] {
class LinearRegressionWithSGD private[mllib] (
private var stepSize: Double,
private var numIterations: Int,
+ private var regParam: Double,
private var miniBatchFraction: Double)
extends GeneralizedLinearAlgorithm[LinearRegressionModel] with Serializable {
@@ -98,6 +99,7 @@ class LinearRegressionWithSGD private[mllib] (
override val optimizer = new GradientDescent(gradient, updater)
.setStepSize(stepSize)
.setNumIterations(numIterations)
+ .setRegParam(regParam)
.setMiniBatchFraction(miniBatchFraction)
/**
@@ -105,7 +107,7 @@ class LinearRegressionWithSGD private[mllib] (
* numIterations: 100, miniBatchFraction: 1.0}.
*/
@Since("0.8.0")
- def this() = this(1.0, 100, 1.0)
+ def this() = this(1.0, 100, 0.0, 1.0)
override protected[mllib] def createModel(weights: Vector, intercept: Double) = {
new LinearRegressionModel(weights, intercept)
@@ -141,7 +143,7 @@ object LinearRegressionWithSGD {
stepSize: Double,
miniBatchFraction: Double,
initialWeights: Vector): LinearRegressionModel = {
- new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction)
+ new LinearRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction)
.run(input, initialWeights)
}
@@ -163,7 +165,7 @@ object LinearRegressionWithSGD {
numIterations: Int,
stepSize: Double,
miniBatchFraction: Double): LinearRegressionModel = {
- new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction).run(input)
+ new LinearRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction).run(input)
}
/**
http://git-wip-us.apache.org/repos/asf/spark/blob/a48296f4/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
index fe2a46b..e8f4422 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
@@ -43,6 +43,7 @@ import org.apache.spark.mllib.linalg.Vector
class StreamingLinearRegressionWithSGD private[mllib] (
private var stepSize: Double,
private var numIterations: Int,
+ private var regParam: Double,
private var miniBatchFraction: Double)
extends StreamingLinearAlgorithm[LinearRegressionModel, LinearRegressionWithSGD]
with Serializable {
@@ -54,10 +55,10 @@ class StreamingLinearRegressionWithSGD private[mllib] (
* (see `StreamingLinearAlgorithm`)
*/
@Since("1.1.0")
- def this() = this(0.1, 50, 1.0)
+ def this() = this(0.1, 50, 0.0, 1.0)
@Since("1.1.0")
- val algorithm = new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction)
+ val algorithm = new LinearRegressionWithSGD(stepSize, numIterations, regParam, miniBatchFraction)
protected var model: Option[LinearRegressionModel] = None
@@ -71,8 +72,17 @@ class StreamingLinearRegressionWithSGD private[mllib] (
}
/**
- * Set the number of iterations of gradient descent to run per update. Default: 50.
+ * Set the regularization parameter. Default: 0.0.
*/
+ @Since("2.0.0")
+ def setRegParam(regParam: Double): this.type = {
+ this.algorithm.optimizer.setRegParam(regParam)
+ this
+ }
+
+ /**
+ * Set the number of iterations of gradient descent to run per update. Default: 50.
+ */
@Since("1.1.0")
def setNumIterations(numIterations: Int): this.type = {
this.algorithm.optimizer.setNumIterations(numIterations)
http://git-wip-us.apache.org/repos/asf/spark/blob/a48296f4/project/MimaExcludes.scala
----------------------------------------------------------------------
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index faa52bf..a9973bc 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -318,6 +318,9 @@ object MimaExcludes {
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.mllib.evaluation.MultilabelMetrics.this"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.predictions"),
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.predictions")
+ ) ++ Seq(
+ // [SPARK-13686][MLLIB][STREAMING] Add a constructor parameter `reqParam` to (Streaming)LinearRegressionWithSGD
+ ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD.this")
)
case v if v.startsWith("1.6") =>
Seq(
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org