You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by fe...@apache.org on 2018/01/25 09:48:19 UTC

spark git commit: [SPARK-23163][DOC][PYTHON] Sync ML Python API with Scala

Repository: spark
Updated Branches:
  refs/heads/master e29b08add -> 39ee2acf9


[SPARK-23163][DOC][PYTHON] Sync ML Python API with Scala

## What changes were proposed in this pull request?

This syncs the ML Python API with Scala for differences found after the 2.3 QA audit.

## How was this patch tested?

NA

Author: Bryan Cutler <cu...@gmail.com>

Closes #20354 from BryanCutler/pyspark-ml-doc-sync-23163.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/39ee2acf
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/39ee2acf
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/39ee2acf

Branch: refs/heads/master
Commit: 39ee2acf96f1e1496cff8e4d2614d27fca76d43b
Parents: e29b08a
Author: Bryan Cutler <cu...@gmail.com>
Authored: Thu Jan 25 01:48:11 2018 -0800
Committer: Felix Cheung <fe...@apache.org>
Committed: Thu Jan 25 01:48:11 2018 -0800

----------------------------------------------------------------------
 python/pyspark/ml/evaluation.py | 8 +++++++-
 python/pyspark/ml/feature.py    | 2 +-
 python/pyspark/ml/fpm.py        | 2 +-
 3 files changed, 9 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/39ee2acf/python/pyspark/ml/evaluation.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
index aa8dbe7..0cbce9b 100644
--- a/python/pyspark/ml/evaluation.py
+++ b/python/pyspark/ml/evaluation.py
@@ -334,7 +334,13 @@ class ClusteringEvaluator(JavaEvaluator, HasPredictionCol, HasFeaturesCol,
     .. note:: Experimental
 
     Evaluator for Clustering results, which expects two input
-    columns: prediction and features.
+    columns: prediction and features. The metric computes the Silhouette
+    measure using the squared Euclidean distance.
+
+    The Silhouette is a measure for the validation of the consistency
+    within clusters. It ranges between 1 and -1, where a value close to
+    1 means that the points in a cluster are close to the other points
+    in the same cluster and far from the points of the other clusters.
 
     >>> from pyspark.ml.linalg import Vectors
     >>> featureAndPredictions = map(lambda x: (Vectors.dense(x[0]), x[1]),

http://git-wip-us.apache.org/repos/asf/spark/blob/39ee2acf/python/pyspark/ml/feature.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index eb79b19..da85ba7 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -3440,7 +3440,7 @@ class ChiSqSelector(JavaEstimator, HasFeaturesCol, HasOutputCol, HasLabelCol, Ja
 
     selectorType = Param(Params._dummy(), "selectorType",
                          "The selector type of the ChisqSelector. " +
-                         "Supported options: numTopFeatures (default), percentile and fpr.",
+                         "Supported options: numTopFeatures (default), percentile, fpr, fdr, fwe.",
                          typeConverter=TypeConverters.toString)
 
     numTopFeatures = \

http://git-wip-us.apache.org/repos/asf/spark/blob/39ee2acf/python/pyspark/ml/fpm.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py
index dd7dda5..b8dafd4 100644
--- a/python/pyspark/ml/fpm.py
+++ b/python/pyspark/ml/fpm.py
@@ -144,7 +144,7 @@ class FPGrowthModel(JavaModel, JavaMLWritable, JavaMLReadable):
     @since("2.2.0")
     def associationRules(self):
         """
-        Data with three columns:
+        DataFrame with three columns:
         * `antecedent`  - Array of the same type as the input column.
         * `consequent`  - Array of the same type as the input column.
         * `confidence`  - Confidence for the rule (`DoubleType`).


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org