You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/06/03 08:15:46 UTC
spark git commit: [SPARK-8043] [MLLIB] [DOC] update NaiveBayes and SVM examples in doc

Repository: spark
Updated Branches:
  refs/heads/master ccaa82329 -> 43adbd561


[SPARK-8043] [MLLIB] [DOC] update NaiveBayes and SVM examples in doc

jira: https://issues.apache.org/jira/browse/SPARK-8043

I found some issues during testing the save/load examples in markdown Documents, as a part of 1.4 QA plan

Author: Yuhao Yang <hh...@gmail.com>

Closes #6584 from hhbyyh/naiveDocExample and squashes the following commits:

a01a206 [Yuhao Yang] fix for Gaussian mixture
2fb8b96 [Yuhao Yang] update NaiveBayes and SVM examples in doc


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/43adbd56
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/43adbd56
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/43adbd56

Branch: refs/heads/master
Commit: 43adbd56114ba80039a23909b0a30d393eaacc62
Parents: ccaa823
Author: Yuhao Yang <hh...@gmail.com>
Authored: Tue Jun 2 23:15:38 2015 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Tue Jun 2 23:15:38 2015 -0700

----------------------------------------------------------------------
 docs/mllib-clustering.md     |  6 +++---
 docs/mllib-linear-methods.md | 24 ++++++++++--------------
 docs/mllib-naive-bayes.md    |  2 +-
 3 files changed, 14 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/43adbd56/docs/mllib-clustering.md
----------------------------------------------------------------------
diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index dac22f7..1b08896 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -249,11 +249,11 @@ public class GaussianMixtureExample {
     GaussianMixtureModel gmm = new GaussianMixture().setK(2).run(parsedData.rdd());
 
     // Save and load GaussianMixtureModel
-    gmm.save(sc, "myGMMModel")
-    GaussianMixtureModel sameModel = GaussianMixtureModel.load(sc, "myGMMModel")
+    gmm.save(sc.sc(), "myGMMModel");
+    GaussianMixtureModel sameModel = GaussianMixtureModel.load(sc.sc(), "myGMMModel");
     // Output the parameters of the mixture model
     for(int j=0; j<gmm.k(); j++) {
-        System.out.println("weight=%f\nmu=%s\nsigma=\n%s\n",
+        System.out.printf("weight=%f\nmu=%s\nsigma=\n%s\n",
             gmm.weights()[j], gmm.gaussians()[j].mu(), gmm.gaussians()[j].sigma());
     }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/43adbd56/docs/mllib-linear-methods.md
----------------------------------------------------------------------
diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md
index 8029edc..3dc8cc9 100644
--- a/docs/mllib-linear-methods.md
+++ b/docs/mllib-linear-methods.md
@@ -163,11 +163,8 @@ object, and make predictions with the resulting model to compute the training
 error.
 
 {% highlight scala %}
-import org.apache.spark.SparkContext
 import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
 import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
-import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.MLUtils
 
 // Load training data in LIBSVM format.
@@ -231,15 +228,13 @@ calling `.rdd()` on your `JavaRDD` object. A self-contained application example
 that is equivalent to the provided example in Scala is given bellow:
 
 {% highlight java %}
-import java.util.Random;
-
 import scala.Tuple2;
 
 import org.apache.spark.api.java.*;
 import org.apache.spark.api.java.function.Function;
 import org.apache.spark.mllib.classification.*;
 import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics;
-import org.apache.spark.mllib.linalg.Vector;
+
 import org.apache.spark.mllib.regression.LabeledPoint;
 import org.apache.spark.mllib.util.MLUtils;
 import org.apache.spark.SparkConf;
@@ -282,8 +277,8 @@ public class SVMClassifier {
     System.out.println("Area under ROC = " + auROC);
 
     // Save and load model
-    model.save(sc.sc(), "myModelPath");
-    SVMModel sameModel = SVMModel.load(sc.sc(), "myModelPath");
+    model.save(sc, "myModelPath");
+    SVMModel sameModel = SVMModel.load(sc, "myModelPath");
   }
 }
 {% endhighlight %}
@@ -315,15 +310,12 @@ a dependency.
 </div>
 
 <div data-lang="python" markdown="1">
-The following example shows how to load a sample dataset, build Logistic Regression model,
+The following example shows how to load a sample dataset, build SVM model,
 and make predictions with the resulting model to compute the training error.
 
-Note that the Python API does not yet support model save/load but will in the future.
-
 {% highlight python %}
-from pyspark.mllib.classification import LogisticRegressionWithSGD
+from pyspark.mllib.classification import SVMWithSGD, SVMModel
 from pyspark.mllib.regression import LabeledPoint
-from numpy import array
 
 # Load and parse the data
 def parsePoint(line):
@@ -334,12 +326,16 @@ data = sc.textFile("data/mllib/sample_svm_data.txt")
 parsedData = data.map(parsePoint)
 
 # Build the model
-model = LogisticRegressionWithSGD.train(parsedData)
+model = SVMWithSGD.train(parsedData, iterations=100)
 
 # Evaluating the model on training data
 labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
 trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count())
 print("Training Error = " + str(trainErr))
+
+# Save and load model
+model.save(sc, "myModelPath")
+sameModel = SVMModel.load(sc, "myModelPath")
 {% endhighlight %}
 </div>
 </div>

http://git-wip-us.apache.org/repos/asf/spark/blob/43adbd56/docs/mllib-naive-bayes.md
----------------------------------------------------------------------
diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md
index acdcc37..bf6d124 100644
--- a/docs/mllib-naive-bayes.md
+++ b/docs/mllib-naive-bayes.md
@@ -53,7 +53,7 @@ val splits = parsedData.randomSplit(Array(0.6, 0.4), seed = 11L)
 val training = splits(0)
 val test = splits(1)
 
-val model = NaiveBayes.train(training, lambda = 1.0, model = "multinomial")
+val model = NaiveBayes.train(training, lambda = 1.0, modelType = "multinomial")
 
 val predictionAndLabel = test.map(p => (model.predict(p.features), p.label))
 val accuracy = 1.0 * predictionAndLabel.filter(x => x._1 == x._2).count() / test.count()


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org