You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2017/08/09 21:03:20 UTC

spark git commit: [SPARK-21551][PYTHON] Increase timeout for PythonRDD.serveIterator

Repository: spark
Updated Branches:
  refs/heads/master 0fb73253f -> c06f3f5ac


[SPARK-21551][PYTHON] Increase timeout for PythonRDD.serveIterator

## What changes were proposed in this pull request?

This modification increases the timeout for `serveIterator` (which is not dynamically configurable). This fixes timeout issues in pyspark when using `collect` and similar functions, in cases where Python may take more than a couple seconds to connect.

See https://issues.apache.org/jira/browse/SPARK-21551

## How was this patch tested?

Ran the tests.

cc rxin

Author: peay <pe...@protonmail.com>

Closes #18752 from peay/spark-21551.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c06f3f5a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c06f3f5a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c06f3f5a

Branch: refs/heads/master
Commit: c06f3f5ac500b02d38ca7ec5fcb33085e07f2f75
Parents: 0fb7325
Author: peay <pe...@protonmail.com>
Authored: Wed Aug 9 14:03:18 2017 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Wed Aug 9 14:03:18 2017 -0700

----------------------------------------------------------------------
 .../src/main/scala/org/apache/spark/api/python/PythonRDD.scala | 6 +++---
 python/pyspark/rdd.py                                          | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/c06f3f5a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 6a81752..3377101 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -683,7 +683,7 @@ private[spark] object PythonRDD extends Logging {
    * Create a socket server and a background thread to serve the data in `items`,
    *
    * The socket server can only accept one connection, or close if no connection
-   * in 3 seconds.
+   * in 15 seconds.
    *
    * Once a connection comes in, it tries to serialize all the data in `items`
    * and send them into this connection.
@@ -692,8 +692,8 @@ private[spark] object PythonRDD extends Logging {
    */
   def serveIterator[T](items: Iterator[T], threadName: String): Int = {
     val serverSocket = new ServerSocket(0, 1, InetAddress.getByName("localhost"))
-    // Close the socket if no connection in 3 seconds
-    serverSocket.setSoTimeout(3000)
+    // Close the socket if no connection in 15 seconds
+    serverSocket.setSoTimeout(15000)
 
     new Thread(threadName) {
       setDaemon(true)

http://git-wip-us.apache.org/repos/asf/spark/blob/c06f3f5a/python/pyspark/rdd.py
----------------------------------------------------------------------
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 3325b65..ea993c5 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -127,7 +127,7 @@ def _load_from_socket(port, serializer):
         af, socktype, proto, canonname, sa = res
         sock = socket.socket(af, socktype, proto)
         try:
-            sock.settimeout(3)
+            sock.settimeout(15)
             sock.connect(sa)
         except socket.error:
             sock.close()


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org