You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2015/06/08 16:45:16 UTC
spark git commit: [SPARK-8140] [MLLIB] Remove empty model check in
StreamingLinearAlgorithm
Repository: spark
Updated Branches:
refs/heads/master a1d9e5cc6 -> e3e9c7038
[SPARK-8140] [MLLIB] Remove empty model check in StreamingLinearAlgorithm
1. Prevent creating a map of data to find numFeatures
2. If model is empty, then initialize with a zero vector of numFeature
Author: MechCoder <ma...@gmail.com>
Closes #6684 from MechCoder/spark-8140 and squashes the following commits:
7fbf5f9 [MechCoder] [SPARK-8140] Remove empty model check in StreamingLinearAlgorithm And other minor cosmits
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e3e9c703
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e3e9c703
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e3e9c703
Branch: refs/heads/master
Commit: e3e9c70384028cc0c322ccea14f19d3b6d6b39eb
Parents: a1d9e5c
Author: MechCoder <ma...@gmail.com>
Authored: Mon Jun 8 15:45:12 2015 +0100
Committer: Sean Owen <so...@cloudera.com>
Committed: Mon Jun 8 15:45:12 2015 +0100
----------------------------------------------------------------------
.../org/apache/spark/mllib/optimization/GradientDescent.scala | 2 +-
.../spark/mllib/regression/GeneralizedLinearAlgorithm.scala | 6 +++---
.../spark/mllib/regression/StreamingLinearAlgorithm.scala | 3 ---
.../mllib/regression/StreamingLinearRegressionWithSGD.scala | 2 +-
4 files changed, 5 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/e3e9c703/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
index 4b7d058..06e45e1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -179,7 +179,7 @@ object GradientDescent extends Logging {
* if it's L2 updater; for L1 updater, the same logic is followed.
*/
var regVal = updater.compute(
- weights, Vectors.dense(new Array[Double](weights.size)), 0, 1, regParam)._2
+ weights, Vectors.zeros(weights.size), 0, 1, regParam)._2
for (i <- 1 to numIterations) {
val bcWeights = data.context.broadcast(weights)
http://git-wip-us.apache.org/repos/asf/spark/blob/e3e9c703/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
index 26be30f..6709bd7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -195,11 +195,11 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
*/
val initialWeights = {
if (numOfLinearPredictor == 1) {
- Vectors.dense(new Array[Double](numFeatures))
+ Vectors.zeros(numFeatures)
} else if (addIntercept) {
- Vectors.dense(new Array[Double]((numFeatures + 1) * numOfLinearPredictor))
+ Vectors.zeros((numFeatures + 1) * numOfLinearPredictor)
} else {
- Vectors.dense(new Array[Double](numFeatures * numOfLinearPredictor))
+ Vectors.zeros(numFeatures * numOfLinearPredictor)
}
}
run(input, initialWeights)
http://git-wip-us.apache.org/repos/asf/spark/blob/e3e9c703/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
index cea8f3f..39308e5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
@@ -87,9 +87,6 @@ abstract class StreamingLinearAlgorithm[
model match {
case Some(m) =>
m.weights
- case None =>
- val numFeatures = rdd.first().features.size
- Vectors.dense(numFeatures)
}
model = Some(algorithm.run(rdd, initialWeights))
logInfo("Model updated at time %s".format(time.toString))
http://git-wip-us.apache.org/repos/asf/spark/blob/e3e9c703/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
index a49153b..235e043 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
@@ -79,7 +79,7 @@ class StreamingLinearRegressionWithSGD private[mllib] (
this
}
- /** Set the initial weights. Default: [0.0, 0.0]. */
+ /** Set the initial weights. */
def setInitialWeights(initialWeights: Vector): this.type = {
this.model = Some(algorithm.createModel(initialWeights, 0.0))
this
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org