You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2014/01/16 05:15:45 UTC
[1/6] git commit: Code clean up for mllib
Updated Branches:
refs/heads/master 0675ca50f -> 84595ea3e
Code clean up for mllib
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/0d94d74e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/0d94d74e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/0d94d74e
Branch: refs/heads/master
Commit: 0d94d74edf759e19c3f4ca98eadf6b22536c6645
Parents: 01c0d72
Author: Frank Dai <so...@gmail.com>
Authored: Tue Jan 14 14:37:26 2014 +0800
Committer: Frank Dai <so...@gmail.com>
Committed: Tue Jan 14 14:37:26 2014 +0800
----------------------------------------------------------------------
.../spark/mllib/api/python/PythonMLLibAPI.scala | 24 +++++++++-----------
.../apache/spark/mllib/classification/SVM.scala | 2 --
.../spark/mllib/clustering/KMeansModel.scala | 5 ++--
.../mllib/regression/LinearRegression.scala | 2 +-
.../mllib/regression/RidgeRegression.scala | 8 +++----
.../spark/mllib/util/LinearDataGenerator.scala | 4 +---
.../spark/mllib/util/MFDataGenerator.scala | 17 +++++++-------
.../org/apache/spark/mllib/util/MLUtils.scala | 2 +-
.../spark/mllib/util/SVMDataGenerator.scala | 2 +-
.../LogisticRegressionSuite.scala | 6 ++---
.../spark/mllib/classification/SVMSuite.scala | 9 ++++----
.../spark/mllib/clustering/KMeansSuite.scala | 3 ---
.../spark/mllib/recommendation/ALSSuite.scala | 1 -
.../spark/mllib/regression/LassoSuite.scala | 10 ++++----
.../regression/LinearRegressionSuite.scala | 9 ++++----
.../mllib/regression/RidgeRegressionSuite.scala | 3 ---
16 files changed, 44 insertions(+), 63 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index c972a71..9ec6019 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -24,7 +24,6 @@ import org.apache.spark.mllib.recommendation._
import org.apache.spark.rdd.RDD
import java.nio.ByteBuffer
import java.nio.ByteOrder
-import java.nio.DoubleBuffer
/**
* The Java stubs necessary for the Python mllib bindings.
@@ -37,11 +36,11 @@ class PythonMLLibAPI extends Serializable {
}
val bb = ByteBuffer.wrap(bytes)
bb.order(ByteOrder.nativeOrder())
- val magic = bb.getLong()
+ val magic = bb.getLong
if (magic != 1) {
throw new IllegalArgumentException("Magic " + magic + " is wrong.")
}
- val length = bb.getLong()
+ val length = bb.getLong
if (packetLength != 16 + 8 * length) {
throw new IllegalArgumentException("Length " + length + " is wrong.")
}
@@ -70,18 +69,17 @@ class PythonMLLibAPI extends Serializable {
}
val bb = ByteBuffer.wrap(bytes)
bb.order(ByteOrder.nativeOrder())
- val magic = bb.getLong()
+ val magic = bb.getLong
if (magic != 2) {
throw new IllegalArgumentException("Magic " + magic + " is wrong.")
}
- val rows = bb.getLong()
- val cols = bb.getLong()
+ val rows = bb.getLong
+ val cols = bb.getLong
if (packetLength != 24 + 8 * rows * cols) {
throw new IllegalArgumentException("Size " + rows + "x" + cols + " is wrong.")
}
val db = bb.asDoubleBuffer()
val ans = new Array[Array[Double]](rows.toInt)
- var i = 0
for (i <- 0 until rows.toInt) {
ans(i) = new Array[Double](cols.toInt)
db.get(ans(i))
@@ -200,9 +198,9 @@ class PythonMLLibAPI extends Serializable {
private def unpackRating(ratingBytes: Array[Byte]): Rating = {
val bb = ByteBuffer.wrap(ratingBytes)
bb.order(ByteOrder.nativeOrder())
- val user = bb.getInt()
- val product = bb.getInt()
- val rating = bb.getDouble()
+ val user = bb.getInt
+ val product = bb.getInt
+ val rating = bb.getDouble
new Rating(user, product, rating)
}
@@ -210,8 +208,8 @@ class PythonMLLibAPI extends Serializable {
private[spark] def unpackTuple(tupleBytes: Array[Byte]): (Int, Int) = {
val bb = ByteBuffer.wrap(tupleBytes)
bb.order(ByteOrder.nativeOrder())
- val v1 = bb.getInt()
- val v2 = bb.getInt()
+ val v1 = bb.getInt
+ val v2 = bb.getInt
(v1, v2)
}
@@ -219,7 +217,7 @@ class PythonMLLibAPI extends Serializable {
* Serialize a Rating object into an array of bytes.
* It can be deserialized using RatingDeserializer().
*
- * @param rate
+ * @param rate the Rating object to serialize
* @return
*/
private[spark] def serializeRating(rate: Rating): Array[Byte] = {
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index 3b8f855..831aa76 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -17,8 +17,6 @@
package org.apache.spark.mllib.classification
-import scala.math.signum
-
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.optimization._
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index cfc81c9..f770707 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -19,8 +19,6 @@ package org.apache.spark.mllib.clustering
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkContext._
-import org.apache.spark.mllib.util.MLUtils
-
/**
* A clustering model for K-means. Each point belongs to the cluster with the closest center.
@@ -39,6 +37,7 @@ class KMeansModel(val clusterCenters: Array[Array[Double]]) extends Serializable
* model on the given data.
*/
def computeCost(data: RDD[Array[Double]]): Double = {
- data.map(p => KMeans.pointCost(clusterCenters, p)).sum
+ data.map(p => KMeans.pointCost(clusterCenters, p)).sum()
+
}
}
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index 597d55e..6aa63b0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -17,7 +17,7 @@
package org.apache.spark.mllib.regression
-import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.optimization._
import org.apache.spark.mllib.util.MLUtils
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index b29508d..41b80cc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -17,7 +17,7 @@
package org.apache.spark.mllib.regression
-import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.optimization._
import org.apache.spark.mllib.util.MLUtils
@@ -76,7 +76,7 @@ class RidgeRegressionWithSGD private (
def createModel(weights: Array[Double], intercept: Double) = {
val weightsMat = new DoubleMatrix(weights.length + 1, 1, (Array(intercept) ++ weights):_*)
val weightsScaled = weightsMat.div(xColSd)
- val interceptScaled = yMean - (weightsMat.transpose().mmul(xColMean.div(xColSd)).get(0))
+ val interceptScaled = yMean - weightsMat.transpose().mmul(xColMean.div(xColSd)).get(0)
new RidgeRegressionModel(weightsScaled.data, interceptScaled)
}
@@ -86,7 +86,7 @@ class RidgeRegressionWithSGD private (
initialWeights: Array[Double])
: RidgeRegressionModel =
{
- val nfeatures: Int = input.first.features.length
+ val nfeatures: Int = input.first().features.length
val nexamples: Long = input.count()
// To avoid penalizing the intercept, we center and scale the data.
@@ -122,7 +122,7 @@ object RidgeRegressionWithSGD {
* @param stepSize Step size to be used for each iteration of gradient descent.
* @param regParam Regularization parameter.
* @param miniBatchFraction Fraction of data to be used per iteration.
- * @param initialWeights Initial set of weights to be used. Array should be equal in size to
+ * @param initialWeights Initial set of weights to be used. Array should be equal in size to
* the number of features in the data.
*/
def train(
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
index bc5045f..2e03684 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
@@ -25,7 +25,6 @@ import org.jblas.DoubleMatrix
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.regression.LabeledPoint
/**
* Generate sample data used for Linear Data. This class generates
@@ -73,7 +72,7 @@ object LinearDataGenerator {
val x = Array.fill[Array[Double]](nPoints)(
Array.fill[Double](weights.length)(2 * rnd.nextDouble - 1.0))
val y = x.map { xi =>
- (new DoubleMatrix(1, xi.length, xi:_*)).dot(weightsMat) + intercept + eps * rnd.nextGaussian()
+ new DoubleMatrix(1, xi.length, xi: _*).dot(weightsMat) + intercept + eps * rnd.nextGaussian()
}
y.zip(x).map(p => LabeledPoint(p._1, p._2))
}
@@ -86,7 +85,6 @@ object LinearDataGenerator {
* @param nexamples Number of examples that will be contained in the RDD.
* @param nfeatures Number of features to generate for each example.
* @param eps Epsilon factor by which examples are scaled.
- * @param weights Weights associated with the first weights.length features.
* @param nparts Number of partitions in the RDD. Default value is 2.
*
* @return RDD of LabeledPoint containing sample data.
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
index d5f3f6b..348aba1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.spark.mllib.recommendation
+package org.apache.spark.mllib.util
import scala.util.Random
@@ -23,7 +23,6 @@ import org.jblas.DoubleMatrix
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
-import org.apache.spark.mllib.util.MLUtils
/**
* Generate RDD(s) containing data for Matrix Factorization.
@@ -31,9 +30,9 @@ import org.apache.spark.mllib.util.MLUtils
* This method samples training entries according to the oversampling factor
* 'trainSampFact', which is a multiplicative factor of the number of
* degrees of freedom of the matrix: rank*(m+n-rank).
-*
-* It optionally samples entries for a testing matrix using
-* 'testSampFact', the percentage of the number of training entries
+*
+* It optionally samples entries for a testing matrix using
+* 'testSampFact', the percentage of the number of training entries
* to use for testing.
*
* This method takes the following inputs:
@@ -73,7 +72,7 @@ object MFDataGenerator{
val A = DoubleMatrix.randn(m, rank)
val B = DoubleMatrix.randn(rank, n)
- val z = 1 / (scala.math.sqrt(scala.math.sqrt(rank)))
+ val z = 1 / scala.math.sqrt(scala.math.sqrt(rank))
A.mmuli(z)
B.mmuli(z)
val fullData = A.mmul(B)
@@ -91,7 +90,7 @@ object MFDataGenerator{
.map(x => (fullData.indexRows(x - 1), fullData.indexColumns(x - 1), fullData.get(x - 1)))
// optionally add gaussian noise
- if (noise) {
+ if (noise) {
trainData.map(x => (x._1, x._2, x._3 + rand.nextGaussian * sigma))
}
@@ -107,8 +106,8 @@ object MFDataGenerator{
.map(x => (fullData.indexRows(x - 1), fullData.indexColumns(x - 1), fullData.get(x - 1)))
testData.map(x => x._1 + "," + x._2 + "," + x._3).saveAsTextFile(outputPath)
}
-
+
sc.stop()
-
+
}
}
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index d91b74c..64c6136 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -97,7 +97,7 @@ object MLUtils {
while (col < nfeatures) {
xColMean.put(col, xColSumsMap(col)._1 / nexamples)
val variance =
- (xColSumsMap(col)._2 - (math.pow(xColSumsMap(col)._1, 2) / nexamples)) / (nexamples)
+ (xColSumsMap(col)._2 - (math.pow(xColSumsMap(col)._1, 2) / nexamples)) / nexamples
xColSd.put(col, math.sqrt(variance))
col += 1
}
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
index 0702209..c96c94f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
@@ -56,7 +56,7 @@ object SVMDataGenerator {
val x = Array.fill[Double](nfeatures) {
rnd.nextDouble() * 2.0 - 1.0
}
- val yD = (new DoubleMatrix(1, x.length, x:_*)).dot(trueWeights) + rnd.nextGaussian() * 0.1
+ val yD = new DoubleMatrix(1, x.length, x: _*).dot(trueWeights) + rnd.nextGaussian() * 0.1
val y = if (yD < 0) 0.0 else 1.0
LabeledPoint(y, x)
}
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
index 34c6729..f97eaf3 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
@@ -80,9 +80,9 @@ class LogisticRegressionSuite extends FunSuite with BeforeAndAfterAll with Shoul
}
def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
- val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) =>
- (prediction != expected.label)
- }.size
+ val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
+ prediction != expected.label
+ }
// At least 83% of the predictions should be on.
((input.length - numOffPredictions).toDouble / input.length) should be > 0.83
}
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
index 6a957e3..0f24fbb 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
@@ -18,7 +18,6 @@
package org.apache.spark.mllib.classification
import scala.util.Random
-import scala.math.signum
import scala.collection.JavaConversions._
import org.scalatest.BeforeAndAfterAll
@@ -50,7 +49,7 @@ object SVMSuite {
val x = Array.fill[Array[Double]](nPoints)(
Array.fill[Double](weights.length)(rnd.nextDouble() * 2.0 - 1.0))
val y = x.map { xi =>
- val yD = (new DoubleMatrix(1, xi.length, xi:_*)).dot(weightsMat) +
+ val yD = new DoubleMatrix(1, xi.length, xi: _*).dot(weightsMat) +
intercept + 0.01 * rnd.nextGaussian()
if (yD < 0) 0.0 else 1.0
}
@@ -72,9 +71,9 @@ class SVMSuite extends FunSuite with BeforeAndAfterAll {
}
def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
- val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) =>
- (prediction != expected.label)
- }.size
+ val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
+ prediction != expected.label
+ }
// At least 80% of the predictions should be on.
assert(numOffPredictions < input.length / 5)
}
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
index 94245f6..73657ca 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
@@ -17,15 +17,12 @@
package org.apache.spark.mllib.clustering
-import scala.util.Random
import org.scalatest.BeforeAndAfterAll
import org.scalatest.FunSuite
import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
-import org.jblas._
class KMeansSuite extends FunSuite with BeforeAndAfterAll {
@transient private var sc: SparkContext = _
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
index e683a90..4e8dbde 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
@@ -24,7 +24,6 @@ import org.scalatest.BeforeAndAfterAll
import org.scalatest.FunSuite
import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
import org.jblas._
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
index db980c7..0a6a9f7 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
@@ -17,8 +17,6 @@
package org.apache.spark.mllib.regression
-import scala.collection.JavaConversions._
-import scala.util.Random
import org.scalatest.BeforeAndAfterAll
import org.scalatest.FunSuite
@@ -41,10 +39,10 @@ class LassoSuite extends FunSuite with BeforeAndAfterAll {
}
def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
- val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) =>
- // A prediction is off if the prediction is more than 0.5 away from expected value.
- math.abs(prediction - expected.label) > 0.5
- }.size
+ val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
+ // A prediction is off if the prediction is more than 0.5 away from expected value.
+ math.abs(prediction - expected.label) > 0.5
+ }
// At least 80% of the predictions should be on.
assert(numOffPredictions < input.length / 5)
}
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
index ef500c7..dd5aa85 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
@@ -21,7 +21,6 @@ import org.scalatest.BeforeAndAfterAll
import org.scalatest.FunSuite
import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
import org.apache.spark.mllib.util.LinearDataGenerator
class LinearRegressionSuite extends FunSuite with BeforeAndAfterAll {
@@ -37,10 +36,10 @@ class LinearRegressionSuite extends FunSuite with BeforeAndAfterAll {
}
def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
- val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) =>
- // A prediction is off if the prediction is more than 0.5 away from expected value.
- math.abs(prediction - expected.label) > 0.5
- }.size
+ val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
+ // A prediction is off if the prediction is more than 0.5 away from expected value.
+ math.abs(prediction - expected.label) > 0.5
+ }
// At least 80% of the predictions should be on.
assert(numOffPredictions < input.length / 5)
}
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
index c18092d..1d6a10b 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
@@ -17,15 +17,12 @@
package org.apache.spark.mllib.regression
-import scala.collection.JavaConversions._
-import scala.util.Random
import org.jblas.DoubleMatrix
import org.scalatest.BeforeAndAfterAll
import org.scalatest.FunSuite
import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
import org.apache.spark.mllib.util.LinearDataGenerator
class RidgeRegressionSuite extends FunSuite with BeforeAndAfterAll {
[3/6] git commit: Indent two spaces
Posted by rx...@apache.org.
Indent two spaces
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/c2852cf4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/c2852cf4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/c2852cf4
Branch: refs/heads/master
Commit: c2852cf42e0fa851b6708b6886b0d78ac5b697a6
Parents: 12386b3
Author: Frank Dai <so...@gmail.com>
Authored: Tue Jan 14 14:59:01 2014 +0800
Committer: Frank Dai <so...@gmail.com>
Committed: Tue Jan 14 14:59:01 2014 +0800
----------------------------------------------------------------------
.../spark/mllib/classification/LogisticRegressionSuite.scala | 2 +-
.../scala/org/apache/spark/mllib/classification/SVMSuite.scala | 2 +-
.../scala/org/apache/spark/mllib/regression/LassoSuite.scala | 4 ++--
.../apache/spark/mllib/regression/LinearRegressionSuite.scala | 4 ++--
4 files changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c2852cf4/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
index f97eaf3..02ede71 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
@@ -81,7 +81,7 @@ class LogisticRegressionSuite extends FunSuite with BeforeAndAfterAll with Shoul
def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
- prediction != expected.label
+ prediction != expected.label
}
// At least 83% of the predictions should be on.
((input.length - numOffPredictions).toDouble / input.length) should be > 0.83
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c2852cf4/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
index 0f24fbb..3357b86 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
@@ -72,7 +72,7 @@ class SVMSuite extends FunSuite with BeforeAndAfterAll {
def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
- prediction != expected.label
+ prediction != expected.label
}
// At least 80% of the predictions should be on.
assert(numOffPredictions < input.length / 5)
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c2852cf4/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
index 0a6a9f7..b2c8df9 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
@@ -40,8 +40,8 @@ class LassoSuite extends FunSuite with BeforeAndAfterAll {
def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
- // A prediction is off if the prediction is more than 0.5 away from expected value.
- math.abs(prediction - expected.label) > 0.5
+ // A prediction is off if the prediction is more than 0.5 away from expected value.
+ math.abs(prediction - expected.label) > 0.5
}
// At least 80% of the predictions should be on.
assert(numOffPredictions < input.length / 5)
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/c2852cf4/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
index dd5aa85..406afba 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
@@ -37,8 +37,8 @@ class LinearRegressionSuite extends FunSuite with BeforeAndAfterAll {
def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
- // A prediction is off if the prediction is more than 0.5 away from expected value.
- math.abs(prediction - expected.label) > 0.5
+ // A prediction is off if the prediction is more than 0.5 away from expected value.
+ math.abs(prediction - expected.label) > 0.5
}
// At least 80% of the predictions should be on.
assert(numOffPredictions < input.length / 5)
[4/6] git commit: Merge remote-tracking branch 'upstream/master' into
code-style
Posted by rx...@apache.org.
Merge remote-tracking branch 'upstream/master' into code-style
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/a3da468d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/a3da468d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/a3da468d
Branch: refs/heads/master
Commit: a3da468d8b99565a966745e09830eaa768a9c267
Parents: c2852cf fdaabdc
Author: Frank Dai <so...@gmail.com>
Authored: Tue Jan 14 15:29:17 2014 +0800
Committer: Frank Dai <so...@gmail.com>
Committed: Tue Jan 14 15:29:17 2014 +0800
----------------------------------------------------------------------
bin/compute-classpath.sh | 2 +
.../scala/org/apache/spark/Accumulators.scala | 4 +-
.../scala/org/apache/spark/Aggregator.scala | 14 +-
.../scala/org/apache/spark/FutureAction.scala | 8 +-
.../apache/spark/InterruptibleIterator.scala | 2 +-
.../main/scala/org/apache/spark/Logging.scala | 2 +-
.../org/apache/spark/broadcast/Broadcast.scala | 1 +
.../spark/broadcast/BroadcastFactory.scala | 2 +-
.../apache/spark/broadcast/HttpBroadcast.scala | 5 +-
.../spark/broadcast/TorrentBroadcast.scala | 6 +-
.../scala/org/apache/spark/deploy/Client.scala | 3 +-
.../spark/deploy/worker/CommandUtils.scala | 3 +-
.../org/apache/spark/executor/Executor.scala | 2 +-
.../org/apache/spark/executor/TaskMetrics.scala | 10 +
.../main/scala/org/apache/spark/package.scala | 3 +
.../org/apache/spark/rdd/CoGroupedRDD.scala | 3 +
.../scala/org/apache/spark/rdd/HadoopRDD.scala | 2 +-
.../org/apache/spark/rdd/PairRDDFunctions.scala | 12 +-
.../scala/org/apache/spark/rdd/PipedRDD.scala | 3 +-
.../main/scala/org/apache/spark/rdd/RDD.scala | 5 +
.../apache/spark/scheduler/SparkListener.scala | 13 +-
.../spark/scheduler/TaskSchedulerImpl.scala | 5 +-
.../org/apache/spark/storage/BlockManager.scala | 3 +
.../spark/storage/BlockObjectWriter.scala | 4 +-
.../org/apache/spark/storage/StorageLevel.scala | 4 +
.../apache/spark/ui/jobs/ExecutorSummary.scala | 2 +
.../apache/spark/ui/jobs/ExecutorTable.scala | 4 +
.../spark/ui/jobs/JobProgressListener.scala | 14 +
.../org/apache/spark/ui/jobs/StagePage.scala | 53 +-
.../apache/spark/util/CompletionIterator.scala | 11 +-
.../org/apache/spark/util/MetadataCleaner.scala | 8 +-
.../spark/util/RateLimitedOutputStream.scala | 79 --
.../apache/spark/util/collection/BitSet.scala | 87 +-
.../util/collection/ExternalAppendOnlyMap.scala | 72 +-
.../spark/util/collection/OpenHashSet.scala | 23 +-
.../util/RateLimitedOutputStreamSuite.scala | 40 -
.../collection/ExternalAppendOnlyMapSuite.scala | 77 +-
docs/_config.yml | 2 +-
docs/_layouts/global.html | 8 +-
docs/_plugins/copy_api_dirs.rb | 2 +-
docs/api.md | 1 +
docs/bagel-programming-guide.md | 10 +-
docs/configuration.md | 11 +-
docs/graphx-programming-guide.md | 1003 ++++++++++++++++++
docs/img/data_parallel_vs_graph_parallel.png | Bin 0 -> 432725 bytes
docs/img/edge-cut.png | Bin 0 -> 12563 bytes
docs/img/edge_cut_vs_vertex_cut.png | Bin 0 -> 79745 bytes
docs/img/graph_analytics_pipeline.png | Bin 0 -> 427220 bytes
docs/img/graph_parallel.png | Bin 0 -> 92288 bytes
docs/img/graphx_figures.pptx | Bin 0 -> 1123363 bytes
docs/img/graphx_logo.png | Bin 0 -> 40324 bytes
docs/img/graphx_performance_comparison.png | Bin 0 -> 166343 bytes
docs/img/property_graph.png | Bin 0 -> 225151 bytes
docs/img/tables_and_graphs.png | Bin 0 -> 166265 bytes
docs/img/triplet.png | Bin 0 -> 31489 bytes
docs/img/vertex-cut.png | Bin 0 -> 12246 bytes
docs/img/vertex_routing_edge_tables.png | Bin 0 -> 570007 bytes
docs/index.md | 4 +-
docs/mllib-guide.md | 19 +-
docs/python-programming-guide.md | 8 +-
.../examples/graphx/LiveJournalPageRank.scala | 49 +
graphx/data/followers.txt | 8 +
graphx/data/users.txt | 7 +
graphx/pom.xml | 67 ++
.../scala/org/apache/spark/graphx/Edge.scala | 45 +
.../org/apache/spark/graphx/EdgeDirection.scala | 44 +
.../scala/org/apache/spark/graphx/EdgeRDD.scala | 102 ++
.../org/apache/spark/graphx/EdgeTriplet.scala | 49 +
.../scala/org/apache/spark/graphx/Graph.scala | 405 +++++++
.../spark/graphx/GraphKryoRegistrator.scala | 31 +
.../org/apache/spark/graphx/GraphLoader.scala | 72 ++
.../org/apache/spark/graphx/GraphOps.scala | 301 ++++++
.../apache/spark/graphx/PartitionStrategy.scala | 103 ++
.../scala/org/apache/spark/graphx/Pregel.scala | 139 +++
.../org/apache/spark/graphx/VertexRDD.scala | 347 ++++++
.../spark/graphx/impl/EdgePartition.scala | 220 ++++
.../graphx/impl/EdgePartitionBuilder.scala | 45 +
.../spark/graphx/impl/EdgeTripletIterator.scala | 42 +
.../apache/spark/graphx/impl/GraphImpl.scala | 379 +++++++
.../spark/graphx/impl/MessageToPartition.scala | 98 ++
.../graphx/impl/ReplicatedVertexView.scala | 195 ++++
.../apache/spark/graphx/impl/RoutingTable.scala | 65 ++
.../apache/spark/graphx/impl/Serializers.scala | 395 +++++++
.../spark/graphx/impl/VertexPartition.scala | 261 +++++
.../org/apache/spark/graphx/impl/package.scala | 7 +
.../org/apache/spark/graphx/lib/Analytics.scala | 136 +++
.../spark/graphx/lib/ConnectedComponents.scala | 38 +
.../org/apache/spark/graphx/lib/PageRank.scala | 147 +++
.../apache/spark/graphx/lib/SVDPlusPlus.scala | 138 +++
.../lib/StronglyConnectedComponents.scala | 94 ++
.../apache/spark/graphx/lib/TriangleCount.scala | 76 ++
.../scala/org/apache/spark/graphx/package.scala | 18 +
.../spark/graphx/util/BytecodeUtils.scala | 117 ++
.../spark/graphx/util/GraphGenerators.scala | 218 ++++
.../collection/PrimitiveKeyOpenHashMap.scala | 153 +++
graphx/src/test/resources/log4j.properties | 28 +
.../org/apache/spark/graphx/GraphOpsSuite.scala | 66 ++
.../org/apache/spark/graphx/GraphSuite.scala | 273 +++++
.../apache/spark/graphx/LocalSparkContext.scala | 28 +
.../org/apache/spark/graphx/PregelSuite.scala | 41 +
.../apache/spark/graphx/SerializerSuite.scala | 183 ++++
.../apache/spark/graphx/VertexRDDSuite.scala | 85 ++
.../spark/graphx/impl/EdgePartitionSuite.scala | 76 ++
.../graphx/impl/VertexPartitionSuite.scala | 113 ++
.../graphx/lib/ConnectedComponentsSuite.scala | 113 ++
.../apache/spark/graphx/lib/PageRankSuite.scala | 119 +++
.../spark/graphx/lib/SVDPlusPlusSuite.scala | 31 +
.../lib/StronglyConnectedComponentsSuite.scala | 57 +
.../spark/graphx/lib/TriangleCountSuite.scala | 70 ++
.../spark/graphx/util/BytecodeUtilsSuite.scala | 93 ++
mllib/data/sample_naive_bayes_data.txt | 6 +
.../spark/mllib/api/python/PythonMLLibAPI.scala | 17 +
.../classification/LogisticRegression.scala | 4 +-
.../spark/mllib/classification/NaiveBayes.scala | 65 +-
.../apache/spark/mllib/classification/SVM.scala | 2 +
.../spark/mllib/regression/LabeledPoint.scala | 6 +-
.../apache/spark/mllib/regression/Lasso.scala | 4 +-
.../mllib/regression/LinearRegression.scala | 2 +
.../mllib/regression/RidgeRegression.scala | 2 +
.../classification/JavaNaiveBayesSuite.java | 72 ++
pom.xml | 5 +-
project/SparkBuild.scala | 21 +-
python/pyspark/mllib/_common.py | 2 +-
python/pyspark/mllib/classification.py | 77 +-
python/pyspark/mllib/clustering.py | 11 +-
python/pyspark/mllib/recommendation.py | 10 +-
python/pyspark/mllib/regression.py | 35 +-
python/pyspark/worker.py | 4 +
python/run-tests | 5 +
.../streaming/api/java/JavaDStreamLike.scala | 3 +-
.../spark/streaming/dstream/DStream.scala | 60 +-
.../dstream/DStreamCheckpointData.scala | 2 +-
.../streaming/dstream/FileInputDStream.scala | 82 +-
.../util/RateLimitedOutputStream.scala | 79 ++
.../spark/streaming/util/RawTextSender.scala | 13 +-
.../spark/streaming/BasicOperationsSuite.scala | 72 +-
.../util/RateLimitedOutputStreamSuite.scala | 40 +
137 files changed, 7959 insertions(+), 388 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/a3da468d/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/a3da468d/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/a3da468d/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/a3da468d/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
----------------------------------------------------------------------
[5/6] git commit: Added parentheses for that getDouble() also has
side effect
Posted by rx...@apache.org.
Added parentheses for that getDouble() also has side effect
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/57fcfc75
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/57fcfc75
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/57fcfc75
Branch: refs/heads/master
Commit: 57fcfc75b3583eb99564fc0d1bb5f49aea53f684
Parents: a3da468
Author: Frank Dai <so...@gmail.com>
Authored: Tue Jan 14 18:56:11 2014 +0800
Committer: Frank Dai <so...@gmail.com>
Committed: Tue Jan 14 18:56:11 2014 +0800
----------------------------------------------------------------------
.../scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/57fcfc75/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index f3656f6..efc0eb9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -217,7 +217,7 @@ class PythonMLLibAPI extends Serializable {
bb.order(ByteOrder.nativeOrder())
val user = bb.getInt()
val product = bb.getInt()
- val rating = bb.getDouble
+ val rating = bb.getDouble()
new Rating(user, product, rating)
}
[2/6] git commit: Since getLong() and getInt() have side effect,
get back parentheses, and remove an empty line
Posted by rx...@apache.org.
Since getLong() and getInt() have side effect, get back parentheses, and remove an empty line
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/12386b3e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/12386b3e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/12386b3e
Branch: refs/heads/master
Commit: 12386b3eea5db7be002b4ba620f3e242bb8ef332
Parents: 0d94d74
Author: Frank Dai <so...@gmail.com>
Authored: Tue Jan 14 14:53:10 2014 +0800
Committer: Frank Dai <so...@gmail.com>
Committed: Tue Jan 14 14:53:10 2014 +0800
----------------------------------------------------------------------
.../spark/mllib/api/python/PythonMLLibAPI.scala | 18 +++++++++---------
.../spark/mllib/clustering/KMeansModel.scala | 1 -
2 files changed, 9 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/12386b3e/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 9ec6019..8520756 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -36,11 +36,11 @@ class PythonMLLibAPI extends Serializable {
}
val bb = ByteBuffer.wrap(bytes)
bb.order(ByteOrder.nativeOrder())
- val magic = bb.getLong
+ val magic = bb.getLong()
if (magic != 1) {
throw new IllegalArgumentException("Magic " + magic + " is wrong.")
}
- val length = bb.getLong
+ val length = bb.getLong()
if (packetLength != 16 + 8 * length) {
throw new IllegalArgumentException("Length " + length + " is wrong.")
}
@@ -69,12 +69,12 @@ class PythonMLLibAPI extends Serializable {
}
val bb = ByteBuffer.wrap(bytes)
bb.order(ByteOrder.nativeOrder())
- val magic = bb.getLong
+ val magic = bb.getLong()
if (magic != 2) {
throw new IllegalArgumentException("Magic " + magic + " is wrong.")
}
- val rows = bb.getLong
- val cols = bb.getLong
+ val rows = bb.getLong()
+ val cols = bb.getLong()
if (packetLength != 24 + 8 * rows * cols) {
throw new IllegalArgumentException("Size " + rows + "x" + cols + " is wrong.")
}
@@ -198,8 +198,8 @@ class PythonMLLibAPI extends Serializable {
private def unpackRating(ratingBytes: Array[Byte]): Rating = {
val bb = ByteBuffer.wrap(ratingBytes)
bb.order(ByteOrder.nativeOrder())
- val user = bb.getInt
- val product = bb.getInt
+ val user = bb.getInt()
+ val product = bb.getInt()
val rating = bb.getDouble
new Rating(user, product, rating)
}
@@ -208,8 +208,8 @@ class PythonMLLibAPI extends Serializable {
private[spark] def unpackTuple(tupleBytes: Array[Byte]): (Int, Int) = {
val bb = ByteBuffer.wrap(tupleBytes)
bb.order(ByteOrder.nativeOrder())
- val v1 = bb.getInt
- val v2 = bb.getInt
+ val v1 = bb.getInt()
+ val v2 = bb.getInt()
(v1, v2)
}
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/12386b3e/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index f770707..980be93 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -38,6 +38,5 @@ class KMeansModel(val clusterCenters: Array[Array[Double]]) extends Serializable
*/
def computeCost(data: RDD[Array[Double]]): Double = {
data.map(p => KMeans.pointCost(clusterCenters, p)).sum()
-
}
}
[6/6] git commit: Merge pull request #414 from soulmachine/code-style
Posted by rx...@apache.org.
Merge pull request #414 from soulmachine/code-style
Code clean up for mllib
* Removed unnecessary parentheses
* Removed unused imports
* Simplified `filter...size()` to `count ...`
* Removed obsoleted parameters' comments
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/84595ea3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/84595ea3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/84595ea3
Branch: refs/heads/master
Commit: 84595ea3e25d2f9578b3de34704da14eb02330fa
Parents: 0675ca5 57fcfc7
Author: Reynold Xin <rx...@apache.org>
Authored: Wed Jan 15 20:15:29 2014 -0800
Committer: Reynold Xin <rx...@apache.org>
Committed: Wed Jan 15 20:15:29 2014 -0800
----------------------------------------------------------------------
.../spark/mllib/api/python/PythonMLLibAPI.scala | 4 +---
.../apache/spark/mllib/classification/SVM.scala | 2 --
.../spark/mllib/clustering/KMeansModel.scala | 4 +---
.../spark/mllib/regression/LinearRegression.scala | 2 +-
.../spark/mllib/regression/RidgeRegression.scala | 6 +++---
.../spark/mllib/util/LinearDataGenerator.scala | 4 +---
.../apache/spark/mllib/util/MFDataGenerator.scala | 17 ++++++++---------
.../org/apache/spark/mllib/util/MLUtils.scala | 2 +-
.../apache/spark/mllib/util/SVMDataGenerator.scala | 2 +-
.../classification/LogisticRegressionSuite.scala | 6 +++---
.../spark/mllib/classification/SVMSuite.scala | 9 ++++-----
.../spark/mllib/clustering/KMeansSuite.scala | 3 ---
.../spark/mllib/recommendation/ALSSuite.scala | 1 -
.../apache/spark/mllib/regression/LassoSuite.scala | 6 ++----
.../mllib/regression/LinearRegressionSuite.scala | 5 ++---
.../mllib/regression/RidgeRegressionSuite.scala | 3 ---
16 files changed, 28 insertions(+), 48 deletions(-)
----------------------------------------------------------------------