You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/06/25 17:13:22 UTC
spark git commit: [MINOR] [MLLIB] rename some functions of PythonMLLibAPI

Repository: spark
Updated Branches:
  refs/heads/master f9b397f54 -> 2519dcc33


[MINOR] [MLLIB] rename some functions of PythonMLLibAPI

Keep the same naming conventions for PythonMLLibAPI.
Only the following three functions is different from others
```scala
trainNaiveBayes
trainGaussianMixture
trainWord2Vec
```
So change them to
```scala
trainNaiveBayesModel
trainGaussianMixtureModel
trainWord2VecModel
```
It does not affect any users and public APIs, only to make better understand for developer and code hacker.

Author: Yanbo Liang <yb...@gmail.com>

Closes #7011 from yanboliang/py-mllib-api-rename and squashes the following commits:

771ffec [Yanbo Liang] rename some functions of PythonMLLibAPI


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2519dcc3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2519dcc3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2519dcc3

Branch: refs/heads/master
Commit: 2519dcc33bde3a6d341790d73b5d292ea7af961a
Parents: f9b397f
Author: Yanbo Liang <yb...@gmail.com>
Authored: Thu Jun 25 08:13:17 2015 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Thu Jun 25 08:13:17 2015 -0700

----------------------------------------------------------------------
 .../org/apache/spark/mllib/api/python/PythonMLLibAPI.scala     | 6 +++---
 python/pyspark/mllib/classification.py                         | 2 +-
 python/pyspark/mllib/clustering.py                             | 6 +++---
 python/pyspark/mllib/feature.py                                | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2519dcc3/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index c4bea7c..b16903a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -278,7 +278,7 @@ private[python] class PythonMLLibAPI extends Serializable {
   /**
    * Java stub for NaiveBayes.train()
    */
-  def trainNaiveBayes(
+  def trainNaiveBayesModel(
       data: JavaRDD[LabeledPoint],
       lambda: Double): JList[Object] = {
     val model = NaiveBayes.train(data.rdd, lambda)
@@ -346,7 +346,7 @@ private[python] class PythonMLLibAPI extends Serializable {
    * Java stub for Python mllib GaussianMixture.run()
    * Returns a list containing weights, mean and covariance of each mixture component.
    */
-  def trainGaussianMixture(
+  def trainGaussianMixtureModel(
       data: JavaRDD[Vector],
       k: Int,
       convergenceTol: Double,
@@ -553,7 +553,7 @@ private[python] class PythonMLLibAPI extends Serializable {
    * @param seed initial seed for random generator
    * @return A handle to java Word2VecModelWrapper instance at python side
    */
-  def trainWord2Vec(
+  def trainWord2VecModel(
       dataJRDD: JavaRDD[java.util.ArrayList[String]],
       vectorSize: Int,
       learningRate: Double,

http://git-wip-us.apache.org/repos/asf/spark/blob/2519dcc3/python/pyspark/mllib/classification.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 2698f10..735d45b 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -581,7 +581,7 @@ class NaiveBayes(object):
         first = data.first()
         if not isinstance(first, LabeledPoint):
             raise ValueError("`data` should be an RDD of LabeledPoint")
-        labels, pi, theta = callMLlibFunc("trainNaiveBayes", data, lambda_)
+        labels, pi, theta = callMLlibFunc("trainNaiveBayesModel", data, lambda_)
         return NaiveBayesModel(labels.toArray(), pi.toArray(), numpy.array(theta))
 
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2519dcc3/python/pyspark/mllib/clustering.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index e6ef729..8bc0654 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -265,9 +265,9 @@ class GaussianMixture(object):
             initialModelWeights = initialModel.weights
             initialModelMu = [initialModel.gaussians[i].mu for i in range(initialModel.k)]
             initialModelSigma = [initialModel.gaussians[i].sigma for i in range(initialModel.k)]
-        weight, mu, sigma = callMLlibFunc("trainGaussianMixture", rdd.map(_convert_to_vector), k,
-                                          convergenceTol, maxIterations, seed, initialModelWeights,
-                                          initialModelMu, initialModelSigma)
+        weight, mu, sigma = callMLlibFunc("trainGaussianMixtureModel", rdd.map(_convert_to_vector),
+                                          k, convergenceTol, maxIterations, seed,
+                                          initialModelWeights, initialModelMu, initialModelSigma)
         mvg_obj = [MultivariateGaussian(mu[i], sigma[i]) for i in range(k)]
         return GaussianMixtureModel(weight, mvg_obj)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2519dcc3/python/pyspark/mllib/feature.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index 334f5b8..f00bb93 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -549,7 +549,7 @@ class Word2Vec(object):
         """
         if not isinstance(data, RDD):
             raise TypeError("data should be an RDD of list of string")
-        jmodel = callMLlibFunc("trainWord2Vec", data, int(self.vectorSize),
+        jmodel = callMLlibFunc("trainWord2VecModel", data, int(self.vectorSize),
                                float(self.learningRate), int(self.numPartitions),
                                int(self.numIterations), int(self.seed),
                                int(self.minCount))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org