You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2013/09/10 21:32:35 UTC
[30/50] git commit: Small tweaks to MLlib docs

Small tweaks to MLlib docs


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/7a5c4b64
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/7a5c4b64
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/7a5c4b64

Branch: refs/remotes/origin/branch-0.8
Commit: 7a5c4b647bdd12d2a6d6285d0349680f1a848b01
Parents: 7d3204b
Author: Matei Zaharia <ma...@eecs.berkeley.edu>
Authored: Sun Sep 8 21:47:24 2013 -0700
Committer: Matei Zaharia <ma...@eecs.berkeley.edu>
Committed: Sun Sep 8 21:47:24 2013 -0700

----------------------------------------------------------------------
 docs/mllib-guide.md                               | 18 ++++++++----------
 .../apache/spark/mllib/classification/SVM.scala   | 13 +++++++------
 .../spark/mllib/regression/LinearRegression.scala |  4 ++--
 3 files changed, 17 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/7a5c4b64/docs/mllib-guide.md
----------------------------------------------------------------------
diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index 1a62999..1583ce4 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -4,7 +4,7 @@ title: Machine Learning Library (MLlib)
 ---
 
 MLlib is a Spark implementation of some common machine learning (ML)
-functionality, as well associated unit tests and data generators.  MLlib
+functionality, as well associated tests and data generators.  MLlib
 currently supports four common types of machine learning problem settings,
 namely, binary classification, regression, clustering and collaborative
 filtering, as well as an underlying gradient descent optimization primitive.
@@ -44,22 +44,20 @@ import org.apache.spark.mllib.regression.LabeledPoint
 
 // Load and parse the data file
 val data = sc.textFile("mllib/data/sample_svm_data.txt")
-val parsedData = data.map(line => {
+val parsedData = data.map { line =>
   val parts = line.split(' ')
   LabeledPoint(parts(0).toDouble, parts.tail.map(x => x.toDouble).toArray)
-})
+}
 
 // Run training algorithm
 val numIterations = 20
-val model = SVMWithSGD.train(
-  parsedData,
-  numIterations)
+val model = SVMWithSGD.train(parsedData, numIterations)
  
 // Evaluate model on training examples and compute training error
-val labelAndPreds = parsedData.map(r => {
-  val prediction = model.predict(r.features)
-  (r.label, prediction)
-})
+val labelAndPreds = parsedData.map { point =>
+  val prediction = model.predict(point.features)
+  (point.label, prediction)
+}
 val trainErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / parsedData.count
 println("trainError = " + trainErr)
 {% endhighlight %}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/7a5c4b64/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index 3511e24..3b8f855 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -29,7 +29,7 @@ import org.apache.spark.mllib.util.DataValidators
 import org.jblas.DoubleMatrix
 
 /**
- * Model built using SVM.
+ * Model for Support Vector Machines (SVMs).
  *
  * @param weights Weights computed for every feature.
  * @param intercept Intercept computed for this model.
@@ -48,8 +48,8 @@ class SVMModel(
 }
 
 /**
- * Train an SVM using Stochastic Gradient Descent.
- * NOTE: Labels used in SVM should be {0, 1}
+ * Train a Support Vector Machine (SVM) using Stochastic Gradient Descent.
+ * NOTE: Labels used in SVM should be {0, 1}.
  */
 class SVMWithSGD private (
     var stepSize: Double,
@@ -79,7 +79,7 @@ class SVMWithSGD private (
 }
 
 /**
- * Top-level methods for calling SVM. NOTE: Labels used in SVM should be {0, 1}
+ * Top-level methods for calling SVM. NOTE: Labels used in SVM should be {0, 1}.
  */
 object SVMWithSGD {
 
@@ -88,14 +88,15 @@ object SVMWithSGD {
    * of iterations of gradient descent using the specified step size. Each iteration uses
    * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in
    * gradient descent are initialized using the initial weights provided.
-   * NOTE: Labels used in SVM should be {0, 1}
+   *
+   * NOTE: Labels used in SVM should be {0, 1}.
    *
    * @param input RDD of (label, array of features) pairs.
    * @param numIterations Number of iterations of gradient descent to run.
    * @param stepSize Step size to be used for each iteration of gradient descent.
    * @param regParam Regularization parameter.
    * @param miniBatchFraction Fraction of data to be used per iteration.
-   * @param initialWeights Initial set of weights to be used. Array should be equal in size to 
+   * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
    */
   def train(

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/7a5c4b64/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index ae95ea2..597d55e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -43,7 +43,7 @@ class LinearRegressionModel(
 }
 
 /**
- * Train a regression model with no regularization using Stochastic Gradient Descent.
+ * Train a linear regression model with no regularization using Stochastic Gradient Descent.
  */
 class LinearRegressionWithSGD private (
     var stepSize: Double,
@@ -83,7 +83,7 @@ object LinearRegressionWithSGD {
    * @param numIterations Number of iterations of gradient descent to run.
    * @param stepSize Step size to be used for each iteration of gradient descent.
    * @param miniBatchFraction Fraction of data to be used per iteration.
-   * @param initialWeights Initial set of weights to be used. Array should be equal in size to 
+   * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
    */
   def train(