You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yl...@apache.org on 2016/11/16 10:46:41 UTC
spark git commit: [SPARK-18434][ML] Add missing ParamValidations for
ML algos
Repository: spark
Updated Branches:
refs/heads/master 241e04bc0 -> c68f1a38a
[SPARK-18434][ML] Add missing ParamValidations for ML algos
## What changes were proposed in this pull request?
Add missing ParamValidations for ML algos
## How was this patch tested?
existing tests
Author: Zheng RuiFeng <ru...@foxmail.com>
Closes #15881 from zhengruifeng/arg_checking.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c68f1a38
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c68f1a38
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c68f1a38
Branch: refs/heads/master
Commit: c68f1a38af67957ee28889667193da8f64bb4342
Parents: 241e04b
Author: Zheng RuiFeng <ru...@foxmail.com>
Authored: Wed Nov 16 02:46:27 2016 -0800
Committer: Yanbo Liang <yb...@gmail.com>
Committed: Wed Nov 16 02:46:27 2016 -0800
----------------------------------------------------------------------
.../main/scala/org/apache/spark/ml/feature/IDF.scala | 3 ++-
.../main/scala/org/apache/spark/ml/feature/PCA.scala | 3 ++-
.../scala/org/apache/spark/ml/feature/Word2Vec.scala | 13 ++++++++-----
.../spark/ml/regression/IsotonicRegression.scala | 3 ++-
.../apache/spark/ml/regression/LinearRegression.scala | 6 +++++-
.../scala/org/apache/spark/ml/tree/treeParams.scala | 4 +++-
6 files changed, 22 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/c68f1a38/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
index 6386dd8..46a0730 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
@@ -44,7 +44,8 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol
* @group param
*/
final val minDocFreq = new IntParam(
- this, "minDocFreq", "minimum number of documents in which a term should appear for filtering")
+ this, "minDocFreq", "minimum number of documents in which a term should appear for filtering" +
+ " (>= 0)", ParamValidators.gtEq(0))
setDefault(minDocFreq -> 0)
http://git-wip-us.apache.org/repos/asf/spark/blob/c68f1a38/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index 6b91348..444006f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -44,7 +44,8 @@ private[feature] trait PCAParams extends Params with HasInputCol with HasOutputC
* The number of principal components.
* @group param
*/
- final val k: IntParam = new IntParam(this, "k", "the number of principal components")
+ final val k: IntParam = new IntParam(this, "k", "the number of principal components (> 0)",
+ ParamValidators.gt(0))
/** @group getParam */
def getK: Int = $(k)
http://git-wip-us.apache.org/repos/asf/spark/blob/c68f1a38/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index d53f3df..3ed08c9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -43,7 +43,8 @@ private[feature] trait Word2VecBase extends Params
* @group param
*/
final val vectorSize = new IntParam(
- this, "vectorSize", "the dimension of codes after transforming from words")
+ this, "vectorSize", "the dimension of codes after transforming from words (> 0)",
+ ParamValidators.gt(0))
setDefault(vectorSize -> 100)
/** @group getParam */
@@ -55,7 +56,8 @@ private[feature] trait Word2VecBase extends Params
* @group expertParam
*/
final val windowSize = new IntParam(
- this, "windowSize", "the window size (context words from [-window, window])")
+ this, "windowSize", "the window size (context words from [-window, window]) (> 0)",
+ ParamValidators.gt(0))
setDefault(windowSize -> 5)
/** @group expertGetParam */
@@ -67,7 +69,8 @@ private[feature] trait Word2VecBase extends Params
* @group param
*/
final val numPartitions = new IntParam(
- this, "numPartitions", "number of partitions for sentences of words")
+ this, "numPartitions", "number of partitions for sentences of words (> 0)",
+ ParamValidators.gt(0))
setDefault(numPartitions -> 1)
/** @group getParam */
@@ -80,7 +83,7 @@ private[feature] trait Word2VecBase extends Params
* @group param
*/
final val minCount = new IntParam(this, "minCount", "the minimum number of times a token must " +
- "appear to be included in the word2vec model's vocabulary")
+ "appear to be included in the word2vec model's vocabulary (>= 0)", ParamValidators.gtEq(0))
setDefault(minCount -> 5)
/** @group getParam */
@@ -95,7 +98,7 @@ private[feature] trait Word2VecBase extends Params
*/
final val maxSentenceLength = new IntParam(this, "maxSentenceLength", "Maximum length " +
"(in words) of each sentence in the input data. Any sentence longer than this threshold will " +
- "be divided into chunks up to the size.")
+ "be divided into chunks up to the size (> 0)", ParamValidators.gt(0))
setDefault(maxSentenceLength -> 1000)
/** @group getParam */
http://git-wip-us.apache.org/repos/asf/spark/blob/c68f1a38/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
index cd7b4f2..4d274f3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
@@ -61,7 +61,8 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures
* @group param
*/
final val featureIndex: IntParam = new IntParam(this, "featureIndex",
- "The index of the feature if featuresCol is a vector column, no effect otherwise.")
+ "The index of the feature if featuresCol is a vector column, no effect otherwise (>= 0)",
+ ParamValidators.gtEq(0))
/** @group getParam */
final def getFeatureIndex: Int = $(featureIndex)
http://git-wip-us.apache.org/repos/asf/spark/blob/c68f1a38/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 9639b07..71c542a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -171,7 +171,11 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
* @group setParam
*/
@Since("1.6.0")
- def setSolver(value: String): this.type = set(solver, value)
+ def setSolver(value: String): this.type = {
+ require(Set("auto", "l-bfgs", "normal").contains(value),
+ s"Solver $value was not supported. Supported options: auto, l-bfgs, normal")
+ set(solver, value)
+ }
setDefault(solver -> "auto")
/**
http://git-wip-us.apache.org/repos/asf/spark/blob/c68f1a38/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index 57c7e44..5a55153 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -73,11 +73,13 @@ private[ml] trait DecisionTreeParams extends PredictorParams
/**
* Minimum information gain for a split to be considered at a tree node.
+ * Should be >= 0.0.
* (default = 0.0)
* @group param
*/
final val minInfoGain: DoubleParam = new DoubleParam(this, "minInfoGain",
- "Minimum information gain for a split to be considered at a tree node.")
+ "Minimum information gain for a split to be considered at a tree node.",
+ ParamValidators.gtEq(0.0))
/**
* Maximum memory in MB allocated to histogram aggregation. If too small, then 1 node will be
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org