You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2015/10/21 01:51:37 UTC

spark git commit: [SPARK-10767][PYSPARK] Make pyspark shared params codegen more consistent

Repository: spark
Updated Branches:
  refs/heads/master da46b77af -> aea7142c9


[SPARK-10767][PYSPARK] Make pyspark shared params codegen more consistent

Namely "." shows up in some places in the template when using the param docstring and not in others

Author: Holden Karau <ho...@pigscanfly.ca>

Closes #9017 from holdenk/SPARK-10767-Make-pyspark-shared-params-codegen-more-consistent.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aea7142c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aea7142c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aea7142c

Branch: refs/heads/master
Commit: aea7142c9802d1e855443c01621ebc8d57be8c5e
Parents: da46b77
Author: Holden Karau <ho...@pigscanfly.ca>
Authored: Tue Oct 20 16:51:32 2015 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Tue Oct 20 16:51:32 2015 -0700

----------------------------------------------------------------------
 .../pyspark/ml/param/_shared_params_code_gen.py | 28 +++---
 python/pyspark/ml/param/shared.py               | 94 ++++++++++----------
 python/pyspark/ml/tests.py                      |  8 +-
 3 files changed, 65 insertions(+), 65 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/aea7142c/python/pyspark/ml/param/_shared_params_code_gen.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index 45a94e9..7143d56 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -47,7 +47,7 @@ def _gen_param_header(name, doc, defaultValueStr):
     """
     template = '''class Has$Name(Params):
     """
-    Mixin for param $name: $doc.
+    Mixin for param $name: $doc
     """
 
     # a placeholder to make it appear in the generated doc
@@ -105,22 +105,22 @@ if __name__ == "__main__":
     print("\n# DO NOT MODIFY THIS FILE! It was generated by _shared_params_code_gen.py.\n")
     print("from pyspark.ml.param import Param, Params\n\n")
     shared = [
-        ("maxIter", "max number of iterations (>= 0)", None),
-        ("regParam", "regularization parameter (>= 0)", None),
-        ("featuresCol", "features column name", "'features'"),
-        ("labelCol", "label column name", "'label'"),
-        ("predictionCol", "prediction column name", "'prediction'"),
+        ("maxIter", "max number of iterations (>= 0).", None),
+        ("regParam", "regularization parameter (>= 0).", None),
+        ("featuresCol", "features column name.", "'features'"),
+        ("labelCol", "label column name.", "'label'"),
+        ("predictionCol", "prediction column name.", "'prediction'"),
         ("probabilityCol", "Column name for predicted class conditional probabilities. " +
          "Note: Not all models output well-calibrated probability estimates! These probabilities " +
          "should be treated as confidences, not precise probabilities.", "'probability'"),
-        ("rawPredictionCol", "raw prediction (a.k.a. confidence) column name", "'rawPrediction'"),
-        ("inputCol", "input column name", None),
-        ("inputCols", "input column names", None),
-        ("outputCol", "output column name", "self.uid + '__output'"),
-        ("numFeatures", "number of features", None),
-        ("checkpointInterval", "checkpoint interval (>= 1)", None),
-        ("seed", "random seed", "hash(type(self).__name__)"),
-        ("tol", "the convergence tolerance for iterative algorithms", None),
+        ("rawPredictionCol", "raw prediction (a.k.a. confidence) column name.", "'rawPrediction'"),
+        ("inputCol", "input column name.", None),
+        ("inputCols", "input column names.", None),
+        ("outputCol", "output column name.", "self.uid + '__output'"),
+        ("numFeatures", "number of features.", None),
+        ("checkpointInterval", "checkpoint interval (>= 1).", None),
+        ("seed", "random seed.", "hash(type(self).__name__)"),
+        ("tol", "the convergence tolerance for iterative algorithms.", None),
         ("stepSize", "Step size to be used for each iteration of optimization.", None),
         ("handleInvalid", "how to handle invalid entries. Options are skip (which will filter " +
          "out rows with bad values), or error (which will throw an errror). More options may be " +

http://git-wip-us.apache.org/repos/asf/spark/blob/aea7142c/python/pyspark/ml/param/shared.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index 8c438bc..3a58ac8 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -26,12 +26,12 @@ class HasMaxIter(Params):
     """
 
     # a placeholder to make it appear in the generated doc
-    maxIter = Param(Params._dummy(), "maxIter", "max number of iterations (>= 0)")
+    maxIter = Param(Params._dummy(), "maxIter", "max number of iterations (>= 0).")
 
     def __init__(self):
         super(HasMaxIter, self).__init__()
-        #: param for max number of iterations (>= 0)
-        self.maxIter = Param(self, "maxIter", "max number of iterations (>= 0)")
+        #: param for max number of iterations (>= 0).
+        self.maxIter = Param(self, "maxIter", "max number of iterations (>= 0).")
 
     def setMaxIter(self, value):
         """
@@ -53,12 +53,12 @@ class HasRegParam(Params):
     """
 
     # a placeholder to make it appear in the generated doc
-    regParam = Param(Params._dummy(), "regParam", "regularization parameter (>= 0)")
+    regParam = Param(Params._dummy(), "regParam", "regularization parameter (>= 0).")
 
     def __init__(self):
         super(HasRegParam, self).__init__()
-        #: param for regularization parameter (>= 0)
-        self.regParam = Param(self, "regParam", "regularization parameter (>= 0)")
+        #: param for regularization parameter (>= 0).
+        self.regParam = Param(self, "regParam", "regularization parameter (>= 0).")
 
     def setRegParam(self, value):
         """
@@ -80,12 +80,12 @@ class HasFeaturesCol(Params):
     """
 
     # a placeholder to make it appear in the generated doc
-    featuresCol = Param(Params._dummy(), "featuresCol", "features column name")
+    featuresCol = Param(Params._dummy(), "featuresCol", "features column name.")
 
     def __init__(self):
         super(HasFeaturesCol, self).__init__()
-        #: param for features column name
-        self.featuresCol = Param(self, "featuresCol", "features column name")
+        #: param for features column name.
+        self.featuresCol = Param(self, "featuresCol", "features column name.")
         self._setDefault(featuresCol='features')
 
     def setFeaturesCol(self, value):
@@ -108,12 +108,12 @@ class HasLabelCol(Params):
     """
 
     # a placeholder to make it appear in the generated doc
-    labelCol = Param(Params._dummy(), "labelCol", "label column name")
+    labelCol = Param(Params._dummy(), "labelCol", "label column name.")
 
     def __init__(self):
         super(HasLabelCol, self).__init__()
-        #: param for label column name
-        self.labelCol = Param(self, "labelCol", "label column name")
+        #: param for label column name.
+        self.labelCol = Param(self, "labelCol", "label column name.")
         self._setDefault(labelCol='label')
 
     def setLabelCol(self, value):
@@ -136,12 +136,12 @@ class HasPredictionCol(Params):
     """
 
     # a placeholder to make it appear in the generated doc
-    predictionCol = Param(Params._dummy(), "predictionCol", "prediction column name")
+    predictionCol = Param(Params._dummy(), "predictionCol", "prediction column name.")
 
     def __init__(self):
         super(HasPredictionCol, self).__init__()
-        #: param for prediction column name
-        self.predictionCol = Param(self, "predictionCol", "prediction column name")
+        #: param for prediction column name.
+        self.predictionCol = Param(self, "predictionCol", "prediction column name.")
         self._setDefault(predictionCol='prediction')
 
     def setPredictionCol(self, value):
@@ -160,7 +160,7 @@ class HasPredictionCol(Params):
 
 class HasProbabilityCol(Params):
     """
-    Mixin for param probabilityCol: Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities..
+    Mixin for param probabilityCol: Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.
     """
 
     # a placeholder to make it appear in the generated doc
@@ -192,12 +192,12 @@ class HasRawPredictionCol(Params):
     """
 
     # a placeholder to make it appear in the generated doc
-    rawPredictionCol = Param(Params._dummy(), "rawPredictionCol", "raw prediction (a.k.a. confidence) column name")
+    rawPredictionCol = Param(Params._dummy(), "rawPredictionCol", "raw prediction (a.k.a. confidence) column name.")
 
     def __init__(self):
         super(HasRawPredictionCol, self).__init__()
-        #: param for raw prediction (a.k.a. confidence) column name
-        self.rawPredictionCol = Param(self, "rawPredictionCol", "raw prediction (a.k.a. confidence) column name")
+        #: param for raw prediction (a.k.a. confidence) column name.
+        self.rawPredictionCol = Param(self, "rawPredictionCol", "raw prediction (a.k.a. confidence) column name.")
         self._setDefault(rawPredictionCol='rawPrediction')
 
     def setRawPredictionCol(self, value):
@@ -220,12 +220,12 @@ class HasInputCol(Params):
     """
 
     # a placeholder to make it appear in the generated doc
-    inputCol = Param(Params._dummy(), "inputCol", "input column name")
+    inputCol = Param(Params._dummy(), "inputCol", "input column name.")
 
     def __init__(self):
         super(HasInputCol, self).__init__()
-        #: param for input column name
-        self.inputCol = Param(self, "inputCol", "input column name")
+        #: param for input column name.
+        self.inputCol = Param(self, "inputCol", "input column name.")
 
     def setInputCol(self, value):
         """
@@ -247,12 +247,12 @@ class HasInputCols(Params):
     """
 
     # a placeholder to make it appear in the generated doc
-    inputCols = Param(Params._dummy(), "inputCols", "input column names")
+    inputCols = Param(Params._dummy(), "inputCols", "input column names.")
 
     def __init__(self):
         super(HasInputCols, self).__init__()
-        #: param for input column names
-        self.inputCols = Param(self, "inputCols", "input column names")
+        #: param for input column names.
+        self.inputCols = Param(self, "inputCols", "input column names.")
 
     def setInputCols(self, value):
         """
@@ -274,12 +274,12 @@ class HasOutputCol(Params):
     """
 
     # a placeholder to make it appear in the generated doc
-    outputCol = Param(Params._dummy(), "outputCol", "output column name")
+    outputCol = Param(Params._dummy(), "outputCol", "output column name.")
 
     def __init__(self):
         super(HasOutputCol, self).__init__()
-        #: param for output column name
-        self.outputCol = Param(self, "outputCol", "output column name")
+        #: param for output column name.
+        self.outputCol = Param(self, "outputCol", "output column name.")
         self._setDefault(outputCol=self.uid + '__output')
 
     def setOutputCol(self, value):
@@ -302,12 +302,12 @@ class HasNumFeatures(Params):
     """
 
     # a placeholder to make it appear in the generated doc
-    numFeatures = Param(Params._dummy(), "numFeatures", "number of features")
+    numFeatures = Param(Params._dummy(), "numFeatures", "number of features.")
 
     def __init__(self):
         super(HasNumFeatures, self).__init__()
-        #: param for number of features
-        self.numFeatures = Param(self, "numFeatures", "number of features")
+        #: param for number of features.
+        self.numFeatures = Param(self, "numFeatures", "number of features.")
 
     def setNumFeatures(self, value):
         """
@@ -329,12 +329,12 @@ class HasCheckpointInterval(Params):
     """
 
     # a placeholder to make it appear in the generated doc
-    checkpointInterval = Param(Params._dummy(), "checkpointInterval", "checkpoint interval (>= 1)")
+    checkpointInterval = Param(Params._dummy(), "checkpointInterval", "checkpoint interval (>= 1).")
 
     def __init__(self):
         super(HasCheckpointInterval, self).__init__()
-        #: param for checkpoint interval (>= 1)
-        self.checkpointInterval = Param(self, "checkpointInterval", "checkpoint interval (>= 1)")
+        #: param for checkpoint interval (>= 1).
+        self.checkpointInterval = Param(self, "checkpointInterval", "checkpoint interval (>= 1).")
 
     def setCheckpointInterval(self, value):
         """
@@ -356,12 +356,12 @@ class HasSeed(Params):
     """
 
     # a placeholder to make it appear in the generated doc
-    seed = Param(Params._dummy(), "seed", "random seed")
+    seed = Param(Params._dummy(), "seed", "random seed.")
 
     def __init__(self):
         super(HasSeed, self).__init__()
-        #: param for random seed
-        self.seed = Param(self, "seed", "random seed")
+        #: param for random seed.
+        self.seed = Param(self, "seed", "random seed.")
         self._setDefault(seed=hash(type(self).__name__))
 
     def setSeed(self, value):
@@ -384,12 +384,12 @@ class HasTol(Params):
     """
 
     # a placeholder to make it appear in the generated doc
-    tol = Param(Params._dummy(), "tol", "the convergence tolerance for iterative algorithms")
+    tol = Param(Params._dummy(), "tol", "the convergence tolerance for iterative algorithms.")
 
     def __init__(self):
         super(HasTol, self).__init__()
-        #: param for the convergence tolerance for iterative algorithms
-        self.tol = Param(self, "tol", "the convergence tolerance for iterative algorithms")
+        #: param for the convergence tolerance for iterative algorithms.
+        self.tol = Param(self, "tol", "the convergence tolerance for iterative algorithms.")
 
     def setTol(self, value):
         """
@@ -407,7 +407,7 @@ class HasTol(Params):
 
 class HasStepSize(Params):
     """
-    Mixin for param stepSize: Step size to be used for each iteration of optimization..
+    Mixin for param stepSize: Step size to be used for each iteration of optimization.
     """
 
     # a placeholder to make it appear in the generated doc
@@ -434,7 +434,7 @@ class HasStepSize(Params):
 
 class HasHandleInvalid(Params):
     """
-    Mixin for param handleInvalid: how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an errror). More options may be added later..
+    Mixin for param handleInvalid: how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an errror). More options may be added later.
     """
 
     # a placeholder to make it appear in the generated doc
@@ -461,7 +461,7 @@ class HasHandleInvalid(Params):
 
 class HasElasticNetParam(Params):
     """
-    Mixin for param elasticNetParam: the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty..
+    Mixin for param elasticNetParam: the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.
     """
 
     # a placeholder to make it appear in the generated doc
@@ -489,7 +489,7 @@ class HasElasticNetParam(Params):
 
 class HasFitIntercept(Params):
     """
-    Mixin for param fitIntercept: whether to fit an intercept term..
+    Mixin for param fitIntercept: whether to fit an intercept term.
     """
 
     # a placeholder to make it appear in the generated doc
@@ -517,7 +517,7 @@ class HasFitIntercept(Params):
 
 class HasStandardization(Params):
     """
-    Mixin for param standardization: whether to standardize the training features before fitting the model..
+    Mixin for param standardization: whether to standardize the training features before fitting the model.
     """
 
     # a placeholder to make it appear in the generated doc
@@ -545,7 +545,7 @@ class HasStandardization(Params):
 
 class HasThresholds(Params):
     """
-    Mixin for param thresholds: Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values >= 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class' threshold..
+    Mixin for param thresholds: Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values >= 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class' threshold.
     """
 
     # a placeholder to make it appear in the generated doc
@@ -572,7 +572,7 @@ class HasThresholds(Params):
 
 class HasWeightCol(Params):
     """
-    Mixin for param weightCol: weight column name. If this is not set or empty, we treat all instance weights as 1.0..
+    Mixin for param weightCol: weight column name. If this is not set or empty, we treat all instance weights as 1.0.
     """
 
     # a placeholder to make it appear in the generated doc

http://git-wip-us.apache.org/repos/asf/spark/blob/aea7142c/python/pyspark/ml/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 648fa88..6a2577d 100644
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -163,7 +163,7 @@ class ParamTests(PySparkTestCase):
         testParams = TestParams()
         maxIter = testParams.maxIter
         self.assertEqual(maxIter.name, "maxIter")
-        self.assertEqual(maxIter.doc, "max number of iterations (>= 0)")
+        self.assertEqual(maxIter.doc, "max number of iterations (>= 0).")
         self.assertTrue(maxIter.parent == testParams.uid)
 
     def test_params(self):
@@ -197,9 +197,9 @@ class ParamTests(PySparkTestCase):
 
         self.assertEqual(
             testParams.explainParams(),
-            "\n".join(["inputCol: input column name (undefined)",
-                       "maxIter: max number of iterations (>= 0) (default: 10, current: 100)",
-                       "seed: random seed (default: 41, current: 43)"]))
+            "\n".join(["inputCol: input column name. (undefined)",
+                       "maxIter: max number of iterations (>= 0). (default: 10, current: 100)",
+                       "seed: random seed. (default: 41, current: 43)"]))
 
     def test_hasseed(self):
         noSeedSpecd = TestParams()


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org