You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/09/11 17:55:40 UTC
spark git commit: [MINOR] [MLLIB] [ML] [DOC] Minor doc fixes for
StringIndexer and MetadataUtils
Repository: spark
Updated Branches:
refs/heads/master 960d2d0ac -> 2e3a28075
[MINOR] [MLLIB] [ML] [DOC] Minor doc fixes for StringIndexer and MetadataUtils
Changes:
* Make Scala doc for StringIndexerInverse clearer. Also remove Scala doc from transformSchema, so that the doc is inherited.
* MetadataUtils.scala: “ Helper utilities for tree-based algorithms” —> not just trees anymore
CC: holdenk mengxr
Author: Joseph K. Bradley <jo...@databricks.com>
Closes #8679 from jkbradley/doc-fixes-1.5.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2e3a2807
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2e3a2807
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2e3a2807
Branch: refs/heads/master
Commit: 2e3a280754a28dc36a71b9ff988e34cbf457f6c3
Parents: 960d2d0
Author: Joseph K. Bradley <jo...@databricks.com>
Authored: Fri Sep 11 08:55:35 2015 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Fri Sep 11 08:55:35 2015 -0700
----------------------------------------------------------------------
.../apache/spark/ml/feature/StringIndexer.scala | 31 +++++++-------------
.../apache/spark/ml/util/MetadataUtils.scala | 2 +-
python/pyspark/ml/feature.py | 16 +++++-----
3 files changed, 20 insertions(+), 29 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/2e3a2807/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index b6482ff..3a4ab9a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -181,10 +181,10 @@ class StringIndexerModel (
/**
* :: Experimental ::
- * A [[Transformer]] that maps a column of string indices back to a new column of corresponding
- * string values using either the ML attributes of the input column, or if provided using the labels
- * supplied by the user.
- * All original columns are kept during transformation.
+ * A [[Transformer]] that maps a column of indices back to a new column of corresponding
+ * string values.
+ * The index-string mapping is either from the ML attributes of the input column,
+ * or from user-supplied labels (which take precedence over ML attributes).
*
* @see [[StringIndexer]] for converting strings into indices
*/
@@ -202,32 +202,23 @@ class IndexToString private[ml] (
/** @group setParam */
def setOutputCol(value: String): this.type = set(outputCol, value)
- /**
- * Optional labels to be provided by the user, if not supplied column
- * metadata is read for labels. The default value is an empty array,
- * but the empty array is ignored and column metadata used instead.
- * @group setParam
- */
+ /** @group setParam */
def setLabels(value: Array[String]): this.type = set(labels, value)
/**
- * Param for array of labels.
- * Optional labels to be provided by the user.
- * Default: Empty array, in which case column metadata is used for labels.
+ * Optional param for array of labels specifying index-string mapping.
+ *
+ * Default: Empty array, in which case [[inputCol]] metadata is used for labels.
* @group param
*/
final val labels: StringArrayParam = new StringArrayParam(this, "labels",
- "array of labels, if not provided metadata from inputCol is used instead.")
+ "Optional array of labels specifying index-string mapping." +
+ " If not provided or if empty, then metadata from inputCol is used instead.")
setDefault(labels, Array.empty[String])
- /**
- * Optional labels to be provided by the user, if not supplied column
- * metadata is read for labels.
- * @group getParam
- */
+ /** @group getParam */
final def getLabels: Array[String] = $(labels)
- /** Transform the schema for the inverse transformation */
override def transformSchema(schema: StructType): StructType = {
val inputColName = $(inputCol)
val inputDataType = schema(inputColName).dataType
http://git-wip-us.apache.org/repos/asf/spark/blob/2e3a2807/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
index fcb517b..96a38a3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.types.StructField
/**
- * Helper utilities for tree-based algorithms
+ * Helper utilities for algorithms using ML metadata
*/
private[spark] object MetadataUtils {
http://git-wip-us.apache.org/repos/asf/spark/blob/2e3a2807/python/pyspark/ml/feature.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 71dc636..97cbee7 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -985,17 +985,17 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
"""
.. note:: Experimental
- A :py:class:`Transformer` that maps a column of string indices back to a new column of
- corresponding string values using either the ML attributes of the input column, or if
- provided using the labels supplied by the user.
- All original columns are kept during transformation.
+ A :py:class:`Transformer` that maps a column of indices back to a new column of
+ corresponding string values.
+ The index-string mapping is either from the ML attributes of the input column,
+ or from user-supplied labels (which take precedence over ML attributes).
See L{StringIndexer} for converting strings into indices.
"""
# a placeholder to make the labels show up in generated doc
labels = Param(Params._dummy(), "labels",
- "Optional array of labels to be provided by the user, if not supplied or " +
- "empty, column metadata is read for labels")
+ "Optional array of labels specifying index-string mapping." +
+ " If not provided or if empty, then metadata from inputCol is used instead.")
@keyword_only
def __init__(self, inputCol=None, outputCol=None, labels=None):
@@ -1006,8 +1006,8 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol):
self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IndexToString",
self.uid)
self.labels = Param(self, "labels",
- "Optional array of labels to be provided by the user, if not " +
- "supplied or empty, column metadata is read for labels")
+ "Optional array of labels specifying index-string mapping. If not" +
+ " provided or if empty, then metadata from inputCol is used instead.")
kwargs = self.__init__._input_kwargs
self.setParams(**kwargs)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org