You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2018/07/31 18:37:16 UTC
spark git commit: [SPARK-24609][ML][DOC] PySpark/SparkR doc doesn't
explain RandomForestClassifier.featureSubsetStrategy well
Repository: spark
Updated Branches:
refs/heads/master 4ac2126bc -> 1223a201f
[SPARK-24609][ML][DOC] PySpark/SparkR doc doesn't explain RandomForestClassifier.featureSubsetStrategy well
## What changes were proposed in this pull request?
update doc of RandomForestClassifier.featureSubsetStrategy
## How was this patch tested?
local built doc
rdoc:
![default](https://user-images.githubusercontent.com/7322292/42807787-4dda6362-89e4-11e8-839f-a8519b7c1f1c.png)
pydoc:
![default](https://user-images.githubusercontent.com/7322292/43112817-5f1d4d88-8f2a-11e8-93ff-de90db8afdca.png)
Author: zhengruifeng <ru...@foxmail.com>
Closes #21788 from zhengruifeng/rf_doc_py_r.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1223a201
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1223a201
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1223a201
Branch: refs/heads/master
Commit: 1223a201fcb2c2f211ad96997ebb00c3554aa822
Parents: 4ac2126
Author: zhengruifeng <ru...@foxmail.com>
Authored: Tue Jul 31 13:37:13 2018 -0500
Committer: Sean Owen <sr...@gmail.com>
Committed: Tue Jul 31 13:37:13 2018 -0500
----------------------------------------------------------------------
R/pkg/R/mllib_tree.R | 13 ++++++++++++-
python/pyspark/ml/regression.py | 9 +++++++--
2 files changed, 19 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/1223a201/R/pkg/R/mllib_tree.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/mllib_tree.R b/R/pkg/R/mllib_tree.R
index 6769be0..0e60842 100644
--- a/R/pkg/R/mllib_tree.R
+++ b/R/pkg/R/mllib_tree.R
@@ -362,7 +362,18 @@ setMethod("write.ml", signature(object = "GBTClassificationModel", path = "chara
#' For regression, must be "variance". For classification, must be one of
#' "entropy" and "gini", default is "gini".
#' @param featureSubsetStrategy The number of features to consider for splits at each tree node.
-#' Supported options: "auto", "all", "onethird", "sqrt", "log2", (0.0-1.0], [1-n].
+#' Supported options: "auto" (choose automatically for task: If
+#' numTrees == 1, set to "all." If numTrees > 1
+#' (forest), set to "sqrt" for classification and
+#' to "onethird" for regression),
+#' "all" (use all features),
+#' "onethird" (use 1/3 of the features),
+#' "sqrt" (use sqrt(number of features)),
+#' "log2" (use log2(number of features)),
+#' "n": (when n is in the range (0, 1.0], use
+#' n * number of features. When n is in the range
+#' (1, number of features), use n features).
+#' Default is "auto".
#' @param seed integer seed for random number generation.
#' @param subsamplingRate Fraction of the training data used for learning each decision tree, in
#' range (0, 1].
http://git-wip-us.apache.org/repos/asf/spark/blob/1223a201/python/pyspark/ml/regression.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 83f0edb..564c9f1 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -608,8 +608,13 @@ class TreeEnsembleParams(DecisionTreeParams):
featureSubsetStrategy = \
Param(Params._dummy(), "featureSubsetStrategy",
"The number of features to consider for splits at each tree node. Supported " +
- "options: " + ", ".join(supportedFeatureSubsetStrategies) + ", (0.0-1.0], [1-n].",
- typeConverter=TypeConverters.toString)
+ "options: 'auto' (choose automatically for task: If numTrees == 1, set to " +
+ "'all'. If numTrees > 1 (forest), set to 'sqrt' for classification and to " +
+ "'onethird' for regression), 'all' (use all features), 'onethird' (use " +
+ "1/3 of the features), 'sqrt' (use sqrt(number of features)), 'log2' (use " +
+ "log2(number of features)), 'n' (when n is in the range (0, 1.0], use " +
+ "n * number of features. When n is in the range (1, number of features), use" +
+ " n features). default = 'auto'", typeConverter=TypeConverters.toString)
def __init__(self):
super(TreeEnsembleParams, self).__init__()
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org