You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yl...@apache.org on 2017/08/01 13:34:39 UTC
spark git commit: [SPARK-21388][ML][PYSPARK] GBTs inherit from
HasStepSize & LInearSVC from HasThreshold
Repository: spark
Updated Branches:
refs/heads/master 5fd0294ff -> 253a07e43
[SPARK-21388][ML][PYSPARK] GBTs inherit from HasStepSize & LInearSVC from HasThreshold
## What changes were proposed in this pull request?
GBTs inherit from HasStepSize & LInearSVC/Binarizer from HasThreshold
## How was this patch tested?
existing tests
Author: Zheng RuiFeng <ru...@foxmail.com>
Author: Ruifeng Zheng <ru...@foxmail.com>
Closes #18612 from zhengruifeng/override_HasXXX.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/253a07e4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/253a07e4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/253a07e4
Branch: refs/heads/master
Commit: 253a07e43a35f3494aa5e5ead9f4997c653325aa
Parents: 5fd0294
Author: Zheng RuiFeng <ru...@foxmail.com>
Authored: Tue Aug 1 21:34:26 2017 +0800
Committer: Yanbo Liang <yb...@gmail.com>
Committed: Tue Aug 1 21:34:26 2017 +0800
----------------------------------------------------------------------
.../spark/ml/classification/LinearSVC.scala | 7 ++-----
.../ml/classification/LogisticRegression.scala | 1 +
.../org/apache/spark/ml/feature/Word2Vec.scala | 1 -
.../ml/param/shared/SharedParamsCodeGen.scala | 6 +++---
.../spark/ml/param/shared/sharedParams.scala | 6 ++----
.../org/apache/spark/ml/tree/treeParams.scala | 7 ++-----
python/pyspark/ml/classification.py | 19 ++++++-------------
python/pyspark/ml/regression.py | 5 +++++
8 files changed, 21 insertions(+), 31 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/253a07e4/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index d6ed6a4..8d556de 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -42,7 +42,7 @@ import org.apache.spark.sql.functions.{col, lit}
/** Params for linear SVM Classifier. */
private[classification] trait LinearSVCParams extends ClassifierParams with HasRegParam
with HasMaxIter with HasFitIntercept with HasTol with HasStandardization with HasWeightCol
- with HasAggregationDepth {
+ with HasAggregationDepth with HasThreshold {
/**
* Param for threshold in binary classification prediction.
@@ -53,11 +53,8 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR
*
* @group param
*/
- final val threshold: DoubleParam = new DoubleParam(this, "threshold",
+ final override val threshold: DoubleParam = new DoubleParam(this, "threshold",
"threshold in binary classification prediction applied to rawPrediction")
-
- /** @group getParam */
- def getThreshold: Double = $(threshold)
}
/**
http://git-wip-us.apache.org/repos/asf/spark/blob/253a07e4/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 6bba7f9..21957d9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -366,6 +366,7 @@ class LogisticRegression @Since("1.2.0") (
@Since("1.5.0")
override def setThreshold(value: Double): this.type = super.setThreshold(value)
+ setDefault(threshold -> 0.5)
@Since("1.5.0")
override def getThreshold: Double = super.getThreshold
http://git-wip-us.apache.org/repos/asf/spark/blob/253a07e4/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index b6909b3..d4c8e4b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -19,7 +19,6 @@ package org.apache.spark.ml.feature
import org.apache.hadoop.fs.Path
-import org.apache.spark.SparkContext
import org.apache.spark.annotation.Since
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.linalg.{BLAS, Vector, Vectors, VectorUDT}
http://git-wip-us.apache.org/repos/asf/spark/blob/253a07e4/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index fd9b20e..1860fe8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -47,8 +47,8 @@ private[shared] object SharedParamsCodeGen {
Some("\"probability\"")),
ParamDesc[String]("varianceCol", "Column name for the biased sample variance of prediction"),
ParamDesc[Double]("threshold",
- "threshold in binary classification prediction, in range [0, 1]", Some("0.5"),
- isValid = "ParamValidators.inRange(0, 1)", finalMethods = false),
+ "threshold in binary classification prediction, in range [0, 1]",
+ isValid = "ParamValidators.inRange(0, 1)", finalMethods = false, finalFields = false),
ParamDesc[Array[Double]]("thresholds", "Thresholds in multi-class classification" +
" to adjust the probability of predicting each class." +
" Array must have length equal to the number of classes, with values > 0" +
@@ -77,7 +77,7 @@ private[shared] object SharedParamsCodeGen {
ParamDesc[Double]("tol", "the convergence tolerance for iterative algorithms (>= 0)",
isValid = "ParamValidators.gtEq(0)"),
ParamDesc[Double]("stepSize", "Step size to be used for each iteration of optimization (>" +
- " 0)", isValid = "ParamValidators.gt(0)"),
+ " 0)", isValid = "ParamValidators.gt(0)", finalFields = false),
ParamDesc[String]("weightCol", "weight column name. If this is not set or empty, we treat " +
"all instance weights as 1.0"),
ParamDesc[String]("solver", "the solver algorithm for optimization", finalFields = false),
http://git-wip-us.apache.org/repos/asf/spark/blob/253a07e4/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index a29b45c..545e45e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -162,9 +162,7 @@ private[ml] trait HasThreshold extends Params {
* Param for threshold in binary classification prediction, in range [0, 1].
* @group param
*/
- final val threshold: DoubleParam = new DoubleParam(this, "threshold", "threshold in binary classification prediction, in range [0, 1]", ParamValidators.inRange(0, 1))
-
- setDefault(threshold, 0.5)
+ val threshold: DoubleParam = new DoubleParam(this, "threshold", "threshold in binary classification prediction, in range [0, 1]", ParamValidators.inRange(0, 1))
/** @group getParam */
def getThreshold: Double = $(threshold)
@@ -352,7 +350,7 @@ private[ml] trait HasStepSize extends Params {
* Param for Step size to be used for each iteration of optimization (> 0).
* @group param
*/
- final val stepSize: DoubleParam = new DoubleParam(this, "stepSize", "Step size to be used for each iteration of optimization (> 0)", ParamValidators.gt(0))
+ val stepSize: DoubleParam = new DoubleParam(this, "stepSize", "Step size to be used for each iteration of optimization (> 0)", ParamValidators.gt(0))
/** @group getParam */
final def getStepSize: Double = $(stepSize)
http://git-wip-us.apache.org/repos/asf/spark/blob/253a07e4/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index 3fc3ac5..47079d9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -458,7 +458,7 @@ private[ml] trait RandomForestRegressorParams
*
* Note: Marked as private and DeveloperApi since this may be made public in the future.
*/
-private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter {
+private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter with HasStepSize {
/* TODO: Add this doc when we add this param. SPARK-7132
* Threshold for stopping early when runWithValidation is used.
@@ -484,13 +484,10 @@ private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter {
* (default = 0.1)
* @group param
*/
- final val stepSize: DoubleParam = new DoubleParam(this, "stepSize", "Step size " +
+ final override val stepSize: DoubleParam = new DoubleParam(this, "stepSize", "Step size " +
"(a.k.a. learning rate) in interval (0, 1] for shrinking the contribution of each estimator.",
ParamValidators.inRange(0, 1, lowerInclusive = false, upperInclusive = true))
- /** @group getParam */
- final def getStepSize: Double = $(stepSize)
-
/**
* @deprecated This method is deprecated and will be removed in 3.0.0.
* @group setParam
http://git-wip-us.apache.org/repos/asf/spark/blob/253a07e4/python/pyspark/ml/classification.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 4af6f71..ab1617b 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -63,7 +63,7 @@ class JavaClassificationModel(JavaPredictionModel):
@inherit_doc
class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
HasRegParam, HasTol, HasRawPredictionCol, HasFitIntercept, HasStandardization,
- HasWeightCol, HasAggregationDepth, JavaMLWritable, JavaMLReadable):
+ HasWeightCol, HasAggregationDepth, HasThreshold, JavaMLWritable, JavaMLReadable):
"""
.. note:: Experimental
@@ -153,18 +153,6 @@ class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, Ha
def _create_model(self, java_model):
return LinearSVCModel(java_model)
- def setThreshold(self, value):
- """
- Sets the value of :py:attr:`threshold`.
- """
- return self._set(threshold=value)
-
- def getThreshold(self):
- """
- Gets the value of threshold or its default value.
- """
- return self.getOrDefault(self.threshold)
-
class LinearSVCModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable):
"""
@@ -1030,6 +1018,11 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
"Supported options: " + ", ".join(GBTParams.supportedLossTypes),
typeConverter=TypeConverters.toString)
+ stepSize = Param(Params._dummy(), "stepSize",
+ "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking " +
+ "the contribution of each estimator.",
+ typeConverter=TypeConverters.toFloat)
+
@keyword_only
def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
http://git-wip-us.apache.org/repos/asf/spark/blob/253a07e4/python/pyspark/ml/regression.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index f0ff7a5..2cc6234 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -1014,6 +1014,11 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
"Supported options: " + ", ".join(GBTParams.supportedLossTypes),
typeConverter=TypeConverters.toString)
+ stepSize = Param(Params._dummy(), "stepSize",
+ "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking " +
+ "the contribution of each estimator.",
+ typeConverter=TypeConverters.toFloat)
+
@keyword_only
def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org