You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2014/03/06 21:45:34 UTC

git commit: SPARK-1187, Added missing Python APIs

Repository: spark
Updated Branches:
  refs/heads/master 3eb009f36 -> 3d3acef04


SPARK-1187, Added missing Python APIs

The following Python APIs are added,
RDD.id()
SparkContext.setJobGroup()
SparkContext.setLocalProperty()
SparkContext.getLocalProperty()
SparkContext.sparkUser()

was raised earlier as a part of  apache/incubator-spark#486

Author: Prabin Banka <pr...@imaginea.com>

Closes #75 from prabinb/python-api-backup and squashes the following commits:

cc3c6cd [Prabin Banka] Added missing Python APIs


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3d3acef0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3d3acef0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3d3acef0

Branch: refs/heads/master
Commit: 3d3acef0474b6dc21f1b470ea96079a491e58b75
Parents: 3eb009f
Author: Prabin Banka <pr...@imaginea.com>
Authored: Thu Mar 6 12:45:27 2014 -0800
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Thu Mar 6 12:45:27 2014 -0800

----------------------------------------------------------------------
 python/pyspark/context.py | 31 +++++++++++++++++++++++++++++++
 python/pyspark/rdd.py     |  7 +++++++
 2 files changed, 38 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/3d3acef0/python/pyspark/context.py
----------------------------------------------------------------------
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 93faa2e..c9f42d3 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -372,6 +372,37 @@ class SparkContext(object):
         return newStorageLevel(storageLevel.useDisk, storageLevel.useMemory,
             storageLevel.deserialized, storageLevel.replication)
 
+    def setJobGroup(self, groupId, description):
+        """
+        Assigns a group ID to all the jobs started by this thread until the group ID is set to a
+        different value or cleared.
+
+        Often, a unit of execution in an application consists of multiple Spark actions or jobs.
+        Application programmers can use this method to group all those jobs together and give a
+        group description. Once set, the Spark web UI will associate such jobs with this group.
+        """
+        self._jsc.setJobGroup(groupId, description)
+
+    def setLocalProperty(self, key, value):
+        """
+        Set a local property that affects jobs submitted from this thread, such as the
+        Spark fair scheduler pool.
+        """
+        self._jsc.setLocalProperty(key, value)
+
+    def getLocalProperty(self, key):
+        """
+        Get a local property set in this thread, or null if it is missing. See
+        L{setLocalProperty}
+        """
+        return self._jsc.getLocalProperty(key)
+
+    def sparkUser(self):
+        """
+        Get SPARK_USER for user who is running SparkContext.
+        """
+        return self._jsc.sc().sparkUser()
+
 def _test():
     import atexit
     import doctest

http://git-wip-us.apache.org/repos/asf/spark/blob/3d3acef0/python/pyspark/rdd.py
----------------------------------------------------------------------
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index be23f87..097a0a2 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -95,6 +95,13 @@ class RDD(object):
         self.is_checkpointed = False
         self.ctx = ctx
         self._jrdd_deserializer = jrdd_deserializer
+        self._id = jrdd.id()
+
+    def id(self):
+        """
+        A unique ID for this RDD (within its SparkContext).
+        """
+        return self._id
 
     def __repr__(self):
         return self._jrdd.toString()