You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by jk...@apache.org on 2016/04/29 07:44:21 UTC

spark git commit: [SPARK-14829][MLLIB] Deprecate GLM APIs using SGD

Repository: spark
Updated Branches:
  refs/heads/master 769a909d1 -> cabd54d93


[SPARK-14829][MLLIB] Deprecate GLM APIs using SGD

## What changes were proposed in this pull request?
According to the [SPARK-14829](https://issues.apache.org/jira/browse/SPARK-14829), deprecate API of LogisticRegression and LinearRegression using SGD

## How was this patch tested?
manual tests

Author: Zheng RuiFeng <ru...@foxmail.com>

Closes #12596 from zhengruifeng/deprecate_sgd.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cabd54d9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cabd54d9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cabd54d9

Branch: refs/heads/master
Commit: cabd54d93162a3f2a0cc7ed76fb46d8224edab94
Parents: 769a909
Author: Zheng RuiFeng <ru...@foxmail.com>
Authored: Thu Apr 28 22:44:14 2016 -0700
Committer: Joseph K. Bradley <jo...@databricks.com>
Committed: Thu Apr 28 22:44:14 2016 -0700

----------------------------------------------------------------------
 .../mllib/classification/LogisticRegression.scala |  2 ++
 .../org/apache/spark/mllib/regression/Lasso.scala |  4 ++++
 .../spark/mllib/regression/LinearRegression.scala |  2 ++
 .../spark/mllib/regression/RidgeRegression.scala  |  4 ++++
 python/pyspark/mllib/classification.py            |  7 +++++++
 python/pyspark/mllib/regression.py                | 18 ++++++++++++++++++
 6 files changed, 37 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/cabd54d9/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index f10570e..1d25a58 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -206,6 +206,7 @@ object LogisticRegressionModel extends Loader[LogisticRegressionModel] {
  * Using [[LogisticRegressionWithLBFGS]] is recommended over this.
  */
 @Since("0.8.0")
+@deprecated("Use ml.classification.LogisticRegression or LogisticRegressionWithLBFGS", "2.0.0")
 class LogisticRegressionWithSGD private[mllib] (
     private var stepSize: Double,
     private var numIterations: Int,
@@ -240,6 +241,7 @@ class LogisticRegressionWithSGD private[mllib] (
  * NOTE: Labels used in Logistic Regression should be {0, 1}
  */
 @Since("0.8.0")
+@deprecated("Use ml.classification.LogisticRegression or LogisticRegressionWithLBFGS", "2.0.0")
 object LogisticRegressionWithSGD {
   // NOTE(shivaram): We use multiple train methods instead of default arguments to support
   // Java programs.

http://git-wip-us.apache.org/repos/asf/spark/blob/cabd54d9/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index d55e5df..ef8c80f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -85,6 +85,8 @@ object LassoModel extends Loader[LassoModel] {
  * See also the documentation for the precise formulation.
  */
 @Since("0.8.0")
+@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 1.0. Note the default " +
+  "regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
 class LassoWithSGD private (
     private var stepSize: Double,
     private var numIterations: Int,
@@ -118,6 +120,8 @@ class LassoWithSGD private (
  *
  */
 @Since("0.8.0")
+@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 1.0. Note the default " +
+  "regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
 object LassoWithSGD {
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/cabd54d9/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index e754e74..9e9d98b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -86,6 +86,7 @@ object LinearRegressionModel extends Loader[LinearRegressionModel] {
  * See also the documentation for the precise formulation.
  */
 @Since("0.8.0")
+@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0")
 class LinearRegressionWithSGD private[mllib] (
     private var stepSize: Double,
     private var numIterations: Int,
@@ -119,6 +120,7 @@ class LinearRegressionWithSGD private[mllib] (
  *
  */
 @Since("0.8.0")
+@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0")
 object LinearRegressionWithSGD {
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/cabd54d9/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index 0a44ff5..512fb9a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -86,6 +86,8 @@ object RidgeRegressionModel extends Loader[RidgeRegressionModel] {
  * See also the documentation for the precise formulation.
  */
 @Since("0.8.0")
+@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 0.0. Note the default " +
+  "regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
 class RidgeRegressionWithSGD private (
     private var stepSize: Double,
     private var numIterations: Int,
@@ -119,6 +121,8 @@ class RidgeRegressionWithSGD private (
  *
  */
 @Since("0.8.0")
+@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 0.0. Note the default " +
+  "regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
 object RidgeRegressionWithSGD {
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/cabd54d9/python/pyspark/mllib/classification.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 57106f8..fe5b684 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -16,6 +16,7 @@
 #
 
 from math import exp
+import warnings
 
 import numpy
 from numpy import array
@@ -266,6 +267,8 @@ class LogisticRegressionModel(LinearClassificationModel):
 class LogisticRegressionWithSGD(object):
     """
     .. versionadded:: 0.9.0
+    .. note:: Deprecated in 2.0.0. Use ml.classification.LogisticRegression or
+            LogisticRegressionWithLBFGS.
     """
     @classmethod
     @since('0.9.0')
@@ -312,6 +315,10 @@ class LogisticRegressionWithSGD(object):
           A condition which decides iteration termination.
           (default: 0.001)
         """
+        warnings.warn(
+            "Deprecated in 2.0.0. Use ml.classification.LogisticRegression or "
+            "LogisticRegressionWithLBFGS.")
+
         def train(rdd, i):
             return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, int(iterations),
                                  float(step), float(miniBatchFraction), i, float(regParam), regType,

http://git-wip-us.apache.org/repos/asf/spark/blob/cabd54d9/python/pyspark/mllib/regression.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 3b77a62..639c5ea 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -17,6 +17,7 @@
 
 import numpy as np
 from numpy import array
+import warnings
 
 from pyspark import RDD, since
 from pyspark.streaming.dstream import DStream
@@ -221,6 +222,7 @@ def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
 class LinearRegressionWithSGD(object):
     """
     .. versionadded:: 0.9.0
+    .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression.
     """
     @classmethod
     @since("0.9.0")
@@ -276,6 +278,8 @@ class LinearRegressionWithSGD(object):
           A condition which decides iteration termination.
           (default: 0.001)
         """
+        warnings.warn("Deprecated in 2.0.0. Use ml.regression.LinearRegression.")
+
         def train(rdd, i):
             return callMLlibFunc("trainLinearRegressionModelWithSGD", rdd, int(iterations),
                                  float(step), float(miniBatchFraction), i, float(regParam),
@@ -366,6 +370,8 @@ class LassoModel(LinearRegressionModelBase):
 class LassoWithSGD(object):
     """
     .. versionadded:: 0.9.0
+    .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 1.0.
+            Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.
     """
     @classmethod
     @since("0.9.0")
@@ -413,6 +419,10 @@ class LassoWithSGD(object):
           A condition which decides iteration termination.
           (default: 0.001)
         """
+        warnings.warn(
+            "Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 1.0. "
+            "Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.")
+
         def train(rdd, i):
             return callMLlibFunc("trainLassoModelWithSGD", rdd, int(iterations), float(step),
                                  float(regParam), float(miniBatchFraction), i, bool(intercept),
@@ -503,6 +513,9 @@ class RidgeRegressionModel(LinearRegressionModelBase):
 class RidgeRegressionWithSGD(object):
     """
     .. versionadded:: 0.9.0
+    .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 0.0.
+            Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for
+            LinearRegression.
     """
     @classmethod
     @since("0.9.0")
@@ -550,6 +563,11 @@ class RidgeRegressionWithSGD(object):
           A condition which decides iteration termination.
           (default: 0.001)
         """
+        warnings.warn(
+            "Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 0.0. "
+            "Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for "
+            "LinearRegression.")
+
         def train(rdd, i):
             return callMLlibFunc("trainRidgeModelWithSGD", rdd, int(iterations), float(step),
                                  float(regParam), float(miniBatchFraction), i, bool(intercept),


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org