You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2014/01/02 06:29:59 UTC
[21/33] git commit: Fix some Python docs and make sure to unset
SPARK_TESTING in Python tests so we don't get the test spark.conf on the
classpath.
Fix some Python docs and make sure to unset SPARK_TESTING in Python
tests so we don't get the test spark.conf on the classpath.
Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/eaa8a68f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/eaa8a68f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/eaa8a68f
Branch: refs/heads/master
Commit: eaa8a68ff08304f713f4f75d39c61c020e0e691d
Parents: 11540b7
Author: Matei Zaharia <ma...@databricks.com>
Authored: Sun Dec 29 20:15:07 2013 -0500
Committer: Matei Zaharia <ma...@databricks.com>
Committed: Sun Dec 29 20:15:07 2013 -0500
----------------------------------------------------------------------
python/epydoc.conf | 2 +-
python/pyspark/__init__.py | 31 +++++++++++++++++--------------
python/pyspark/broadcast.py | 11 +++++++++++
python/pyspark/conf.py | 10 +++++-----
python/pyspark/context.py | 3 ++-
python/run-tests | 2 +-
6 files changed, 37 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/eaa8a68f/python/epydoc.conf
----------------------------------------------------------------------
diff --git a/python/epydoc.conf b/python/epydoc.conf
index 0b42e72..95a6af0 100644
--- a/python/epydoc.conf
+++ b/python/epydoc.conf
@@ -34,4 +34,4 @@ private: no
exclude: pyspark.cloudpickle pyspark.worker pyspark.join
pyspark.java_gateway pyspark.examples pyspark.shell pyspark.test
- pyspark.rddsampler pyspark.daemon
+ pyspark.rddsampler pyspark.daemon pyspark.mllib._common
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/eaa8a68f/python/pyspark/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index f1b95ac..2b2c3a0 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -20,21 +20,24 @@ PySpark is the Python API for Spark.
Public classes:
- - L{SparkContext<pyspark.context.SparkContext>}
- Main entry point for Spark functionality.
- - L{RDD<pyspark.rdd.RDD>}
- A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
- - L{Broadcast<pyspark.broadcast.Broadcast>}
- A broadcast variable that gets reused across tasks.
- - L{Accumulator<pyspark.accumulators.Accumulator>}
- An "add-only" shared variable that tasks can only add values to.
- - L{SparkConf<pyspark.conf.SparkConf}
- Configuration for a Spark application.
- - L{SparkFiles<pyspark.files.SparkFiles>}
- Access files shipped with jobs.
- - L{StorageLevel<pyspark.storagelevel.StorageLevel>}
- Finer-grained cache persistence levels.
+ - L{SparkContext<pyspark.context.SparkContext>}
+ Main entry point for Spark functionality.
+ - L{RDD<pyspark.rdd.RDD>}
+ A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
+ - L{Broadcast<pyspark.broadcast.Broadcast>}
+ A broadcast variable that gets reused across tasks.
+ - L{Accumulator<pyspark.accumulators.Accumulator>}
+ An "add-only" shared variable that tasks can only add values to.
+ - L{SparkConf<pyspark.conf.SparkConf>}
+ For configuring Spark.
+ - L{SparkFiles<pyspark.files.SparkFiles>}
+ Access files shipped with jobs.
+ - L{StorageLevel<pyspark.storagelevel.StorageLevel>}
+ Finer-grained cache persistence levels.
"""
+
+
+
import sys
import os
sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j0.7.egg"))
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/eaa8a68f/python/pyspark/broadcast.py
----------------------------------------------------------------------
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index dfdaba2..43f40f8 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -45,7 +45,18 @@ def _from_id(bid):
class Broadcast(object):
+ """
+ A broadcast variable created with
+ L{SparkContext.broadcast()<pyspark.context.SparkContext.broadcast>}.
+ Access its value through C{.value}.
+ """
+
def __init__(self, bid, value, java_broadcast=None, pickle_registry=None):
+ """
+ Should not be called directly by users -- use
+ L{SparkContext.broadcast()<pyspark.context.SparkContext.broadcast>}
+ instead.
+ """
self.value = value
self.bid = bid
self._jbroadcast = java_broadcast
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/eaa8a68f/python/pyspark/conf.py
----------------------------------------------------------------------
diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py
index a79f348..cf98b0e 100644
--- a/python/pyspark/conf.py
+++ b/python/pyspark/conf.py
@@ -55,11 +55,11 @@ class SparkConf(object):
parameters as key-value pairs.
Most of the time, you would create a SparkConf object with
- C{SparkConf()}, which will load values from `spark.*` Java system
- properties and any `spark.conf` on your application's classpath.
- In this case, system properties take priority over `spark.conf`,
- and any parameters you set directly on the `SparkConf` object take
- priority over both of those.
+ C{SparkConf()}, which will load values from C{spark.*} Java system
+ properties and any C{spark.conf} on your Spark classpath. In this
+ case, system properties take priority over C{spark.conf}, and any
+ parameters you set directly on the C{SparkConf} object take priority
+ over both of those.
For unit tests, you can also call C{SparkConf(false)} to skip
loading external settings and get the same configuration no matter
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/eaa8a68f/python/pyspark/context.py
----------------------------------------------------------------------
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 1244a14..8b02802 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -267,7 +267,8 @@ class SparkContext(object):
def broadcast(self, value):
"""
- Broadcast a read-only variable to the cluster, returning a C{Broadcast}
+ Broadcast a read-only variable to the cluster, returning a
+ L{Broadcast<pyspark.broadcast.Broadcast>}
object for reading it in distributed functions. The variable will be
sent to each cluster only once.
"""
http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/eaa8a68f/python/run-tests
----------------------------------------------------------------------
diff --git a/python/run-tests b/python/run-tests
index a0898b3..4b71fff 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -29,7 +29,7 @@ FAILED=0
rm -f unit-tests.log
function run_test() {
- $FWDIR/pyspark $1 2>&1 | tee -a unit-tests.log
+ SPARK_TESTING=0 $FWDIR/pyspark $1 2>&1 | tee -a unit-tests.log
FAILED=$((PIPESTATUS[0]||$FAILED))
}