You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/05/01 01:26:53 UTC
spark git commit: [SPARK-7279] Removed diffSum which is theoretical
zero in LinearRegression and coding formating
Repository: spark
Updated Branches:
refs/heads/master fa01bec48 -> 1c3e402e6
[SPARK-7279] Removed diffSum which is theoretical zero in LinearRegression and coding formating
Author: DB Tsai <db...@netflix.com>
Closes #5809 from dbtsai/format and squashes the following commits:
6904eed [DB Tsai] triger jenkins
9146e19 [DB Tsai] initial commit
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1c3e402e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1c3e402e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1c3e402e
Branch: refs/heads/master
Commit: 1c3e402e669d047410b00de9193adf3c329844a2
Parents: fa01bec
Author: DB Tsai <db...@netflix.com>
Authored: Thu Apr 30 16:26:51 2015 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Thu Apr 30 16:26:51 2015 -0700
----------------------------------------------------------------------
.../spark/ml/regression/LinearRegression.scala | 18 ++++++------------
1 file changed, 6 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/1c3e402e/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 11c6cea..0b81c48 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -25,8 +25,7 @@ import breeze.optimize.{CachedDiffFunction, DiffFunction}
import org.apache.spark.annotation.AlphaComponent
import org.apache.spark.ml.param.{Params, ParamMap}
-import org.apache.spark.ml.param.shared.{HasTol, HasElasticNetParam, HasMaxIter,
- HasRegParam}
+import org.apache.spark.ml.param.shared.{HasTol, HasElasticNetParam, HasMaxIter, HasRegParam}
import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
import org.apache.spark.mllib.linalg.{Vector, Vectors}
import org.apache.spark.mllib.linalg.BLAS._
@@ -103,9 +102,7 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress
case LabeledPoint(label: Double, features: Vector) => (label, features)
}
val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
- if (handlePersistence) {
- instances.persist(StorageLevel.MEMORY_AND_DISK)
- }
+ if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK)
val (summarizer, statCounter) = instances.treeAggregate(
(new MultivariateOnlineSummarizer, new StatCounter))( {
@@ -146,8 +143,7 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress
val optimizer = if (paramMap(elasticNetParam) == 0.0 || effectiveRegParam == 0.0) {
new BreezeLBFGS[BDV[Double]](paramMap(maxIter), 10, paramMap(tol))
} else {
- new BreezeOWLQN[Int, BDV[Double]](paramMap(maxIter), 10, effectiveL1RegParam,
- paramMap(tol))
+ new BreezeOWLQN[Int, BDV[Double]](paramMap(maxIter), 10, effectiveL1RegParam, paramMap(tol))
}
val initialWeights = Vectors.zeros(numFeatures)
@@ -304,9 +300,8 @@ private class LeastSquaresAggregator(
featuresStd: Array[Double],
featuresMean: Array[Double]) extends Serializable {
- private var totalCnt: Long = 0
+ private var totalCnt: Long = 0L
private var lossSum = 0.0
- private var diffSum = 0.0
private val (effectiveWeightsArray: Array[Double], offset: Double, dim: Int) = {
val weightsArray = weights.toArray.clone()
@@ -323,9 +318,10 @@ private class LeastSquaresAggregator(
}
(weightsArray, -sum + labelMean / labelStd, weightsArray.length)
}
+
private val effectiveWeightsVector = Vectors.dense(effectiveWeightsArray)
- private val gradientSumArray: Array[Double] = Array.ofDim[Double](dim)
+ private val gradientSumArray = Array.ofDim[Double](dim)
/**
* Add a new training data to this LeastSquaresAggregator, and update the loss and gradient
@@ -350,7 +346,6 @@ private class LeastSquaresAggregator(
}
}
lossSum += diff * diff / 2.0
- diffSum += diff
}
totalCnt += 1
@@ -372,7 +367,6 @@ private class LeastSquaresAggregator(
if (other.totalCnt != 0) {
totalCnt += other.totalCnt
lossSum += other.lossSum
- diffSum += other.diffSum
var i = 0
val localThisGradientSumArray = this.gradientSumArray
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org