You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2016/11/02 09:23:39 UTC

spark git commit: [SPARK-18133][BRANCH-2.0][EXAMPLES][ML] Python ML Pipeline Exampl…

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 81f080425 -> 09178b6ee


[SPARK-18133][BRANCH-2.0][EXAMPLES][ML] Python ML Pipeline Exampl\u2026

## What changes were proposed in this pull request?

[Fix] [branch-2.0] In Python 3, there is only one integer type (i.e., int), which mostly behaves like the long type in Python 2. Since Python 3 won't accept "L", so removed "L" in all examples.

## How was this patch tested?

Unit tests.

\u2026e has syntax errors]

Author: Jagadeesan <as...@us.ibm.com>

Closes #15729 from jagadeesanas2/SPARK-18133_branch2.0.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/09178b6e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/09178b6e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/09178b6e

Branch: refs/heads/branch-2.0
Commit: 09178b6eefd33011c3e90164356a5d6c3ae737bd
Parents: 81f0804
Author: Jagadeesan <as...@us.ibm.com>
Authored: Wed Nov 2 09:23:30 2016 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Wed Nov 2 09:23:30 2016 +0000

----------------------------------------------------------------------
 examples/src/main/python/ml/cross_validator.py      |  8 ++++----
 examples/src/main/python/ml/pipeline_example.py     | 16 ++++++++--------
 .../mllib/binary_classification_metrics_example.py  |  2 +-
 .../python/mllib/multi_class_metrics_example.py     |  2 +-
 4 files changed, 14 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/09178b6e/examples/src/main/python/ml/cross_validator.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/cross_validator.py b/examples/src/main/python/ml/cross_validator.py
index a41df6c..283db5d 100644
--- a/examples/src/main/python/ml/cross_validator.py
+++ b/examples/src/main/python/ml/cross_validator.py
@@ -83,10 +83,10 @@ if __name__ == "__main__":
 
     # Prepare test documents, which are unlabeled.
     test = spark.createDataFrame([
-        (4L, "spark i j k"),
-        (5L, "l m n"),
-        (6L, "mapreduce spark"),
-        (7L, "apache hadoop")
+        (4, "spark i j k"),
+        (5, "l m n"),
+        (6, "mapreduce spark"),
+        (7, "apache hadoop")
     ], ["id", "text"])
 
     # Make predictions on test documents. cvModel uses the best model found (lrModel).

http://git-wip-us.apache.org/repos/asf/spark/blob/09178b6e/examples/src/main/python/ml/pipeline_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/ml/pipeline_example.py b/examples/src/main/python/ml/pipeline_example.py
index bd10cfd..1926cd2 100644
--- a/examples/src/main/python/ml/pipeline_example.py
+++ b/examples/src/main/python/ml/pipeline_example.py
@@ -35,10 +35,10 @@ if __name__ == "__main__":
     # $example on$
     # Prepare training documents from a list of (id, text, label) tuples.
     training = spark.createDataFrame([
-        (0L, "a b c d e spark", 1.0),
-        (1L, "b d", 0.0),
-        (2L, "spark f g h", 1.0),
-        (3L, "hadoop mapreduce", 0.0)], ["id", "text", "label"])
+        (0, "a b c d e spark", 1.0),
+        (1, "b d", 0.0),
+        (2, "spark f g h", 1.0),
+        (3, "hadoop mapreduce", 0.0)], ["id", "text", "label"])
 
     # Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
     tokenizer = Tokenizer(inputCol="text", outputCol="words")
@@ -51,10 +51,10 @@ if __name__ == "__main__":
 
     # Prepare test documents, which are unlabeled (id, text) tuples.
     test = spark.createDataFrame([
-        (4L, "spark i j k"),
-        (5L, "l m n"),
-        (6L, "mapreduce spark"),
-        (7L, "apache hadoop")], ["id", "text"])
+        (4, "spark i j k"),
+        (5, "l m n"),
+        (6, "mapreduce spark"),
+        (7, "apache hadoop")], ["id", "text"])
 
     # Make predictions on test documents and print columns of interest.
     prediction = model.transform(test)

http://git-wip-us.apache.org/repos/asf/spark/blob/09178b6e/examples/src/main/python/mllib/binary_classification_metrics_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/mllib/binary_classification_metrics_example.py b/examples/src/main/python/mllib/binary_classification_metrics_example.py
index daf000e..91f8378 100644
--- a/examples/src/main/python/mllib/binary_classification_metrics_example.py
+++ b/examples/src/main/python/mllib/binary_classification_metrics_example.py
@@ -39,7 +39,7 @@ if __name__ == "__main__":
         .rdd.map(lambda row: LabeledPoint(row[0], row[1]))
 
     # Split data into training (60%) and test (40%)
-    training, test = data.randomSplit([0.6, 0.4], seed=11L)
+    training, test = data.randomSplit([0.6, 0.4], seed=11)
     training.cache()
 
     # Run training algorithm to build the model

http://git-wip-us.apache.org/repos/asf/spark/blob/09178b6e/examples/src/main/python/mllib/multi_class_metrics_example.py
----------------------------------------------------------------------
diff --git a/examples/src/main/python/mllib/multi_class_metrics_example.py b/examples/src/main/python/mllib/multi_class_metrics_example.py
index cd56b3c..7dc5fb4 100644
--- a/examples/src/main/python/mllib/multi_class_metrics_example.py
+++ b/examples/src/main/python/mllib/multi_class_metrics_example.py
@@ -32,7 +32,7 @@ if __name__ == "__main__":
     data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_multiclass_classification_data.txt")
 
     # Split data into training (60%) and test (40%)
-    training, test = data.randomSplit([0.6, 0.4], seed=11L)
+    training, test = data.randomSplit([0.6, 0.4], seed=11)
     training.cache()
 
     # Run training algorithm to build the model


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org