You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yl...@apache.org on 2017/07/05 10:41:05 UTC
spark git commit: [SPARK-21310][ML][PYSPARK] Expose offset in PySpark
Repository: spark
Updated Branches:
refs/heads/master a38643256 -> 4852b7d44
[SPARK-21310][ML][PYSPARK] Expose offset in PySpark
## What changes were proposed in this pull request?
Add offset to PySpark in GLM as in #16699.
## How was this patch tested?
Python test
Author: actuaryzhang <ac...@gmail.com>
Closes #18534 from actuaryzhang/pythonOffset.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4852b7d4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4852b7d4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4852b7d4
Branch: refs/heads/master
Commit: 4852b7d447e872079c2c81428354adc825a87b27
Parents: a386432
Author: actuaryzhang <ac...@gmail.com>
Authored: Wed Jul 5 18:41:00 2017 +0800
Committer: Yanbo Liang <yb...@gmail.com>
Committed: Wed Jul 5 18:41:00 2017 +0800
----------------------------------------------------------------------
python/pyspark/ml/regression.py | 25 +++++++++++++++++++++----
python/pyspark/ml/tests.py | 14 ++++++++++++++
2 files changed, 35 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/4852b7d4/python/pyspark/ml/regression.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 84d8433..f0ff7a5 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -1376,17 +1376,20 @@ class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, Ha
typeConverter=TypeConverters.toFloat)
solver = Param(Params._dummy(), "solver", "The solver algorithm for optimization. Supported " +
"options: irls.", typeConverter=TypeConverters.toString)
+ offsetCol = Param(Params._dummy(), "offsetCol", "The offset column name. If this is not set " +
+ "or empty, we treat all instance offsets as 0.0",
+ typeConverter=TypeConverters.toString)
@keyword_only
def __init__(self, labelCol="label", featuresCol="features", predictionCol="prediction",
family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6,
regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None,
- variancePower=0.0, linkPower=None):
+ variancePower=0.0, linkPower=None, offsetCol=None):
"""
__init__(self, labelCol="label", featuresCol="features", predictionCol="prediction", \
family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6, \
regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None, \
- variancePower=0.0, linkPower=None)
+ variancePower=0.0, linkPower=None, offsetCol=None)
"""
super(GeneralizedLinearRegression, self).__init__()
self._java_obj = self._new_java_obj(
@@ -1402,12 +1405,12 @@ class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, Ha
def setParams(self, labelCol="label", featuresCol="features", predictionCol="prediction",
family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6,
regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None,
- variancePower=0.0, linkPower=None):
+ variancePower=0.0, linkPower=None, offsetCol=None):
"""
setParams(self, labelCol="label", featuresCol="features", predictionCol="prediction", \
family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6, \
regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None, \
- variancePower=0.0, linkPower=None)
+ variancePower=0.0, linkPower=None, offsetCol=None)
Sets params for generalized linear regression.
"""
kwargs = self._input_kwargs
@@ -1486,6 +1489,20 @@ class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, Ha
"""
return self.getOrDefault(self.linkPower)
+ @since("2.3.0")
+ def setOffsetCol(self, value):
+ """
+ Sets the value of :py:attr:`offsetCol`.
+ """
+ return self._set(offsetCol=value)
+
+ @since("2.3.0")
+ def getOffsetCol(self):
+ """
+ Gets the value of offsetCol or its default value.
+ """
+ return self.getOrDefault(self.offsetCol)
+
class GeneralizedLinearRegressionModel(JavaModel, JavaPredictionModel, JavaMLWritable,
JavaMLReadable):
http://git-wip-us.apache.org/repos/asf/spark/blob/4852b7d4/python/pyspark/ml/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index ffb8b0a..7870047 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -1291,6 +1291,20 @@ class GeneralizedLinearRegressionTest(SparkSessionTestCase):
self.assertTrue(np.allclose(model2.coefficients.toArray(), [-0.6667, 0.5], atol=1E-4))
self.assertTrue(np.isclose(model2.intercept, 0.6667, atol=1E-4))
+ def test_offset(self):
+
+ df = self.spark.createDataFrame(
+ [(0.2, 1.0, 2.0, Vectors.dense(0.0, 5.0)),
+ (0.5, 2.1, 0.5, Vectors.dense(1.0, 2.0)),
+ (0.9, 0.4, 1.0, Vectors.dense(2.0, 1.0)),
+ (0.7, 0.7, 0.0, Vectors.dense(3.0, 3.0))], ["label", "weight", "offset", "features"])
+
+ glr = GeneralizedLinearRegression(family="poisson", weightCol="weight", offsetCol="offset")
+ model = glr.fit(df)
+ self.assertTrue(np.allclose(model.coefficients.toArray(), [0.664647, -0.3192581],
+ atol=1E-4))
+ self.assertTrue(np.isclose(model.intercept, -1.561613, atol=1E-4))
+
class FPGrowthTests(SparkSessionTestCase):
def setUp(self):
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org