You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2013/11/28 03:24:49 UTC

[1/2] git commit: add http timeout for httpbroadcast

Updated Branches:
  refs/heads/master fb6875dd5 -> 743a31a7c


add http timeout for httpbroadcast


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/db998a6e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/db998a6e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/db998a6e

Branch: refs/heads/master
Commit: db998a6e14389768f93b1fdd6be7847d5f7604fd
Parents: 18d6df0
Author: haitao.yao <ya...@gmail.com>
Authored: Tue Nov 26 18:23:48 2013 +0800
Committer: haitao.yao <ya...@gmail.com>
Committed: Tue Nov 26 18:23:48 2013 +0800

----------------------------------------------------------------------
 .../scala/org/apache/spark/broadcast/HttpBroadcast.scala  | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/db998a6e/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
index 609464e..47db720 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
@@ -19,6 +19,7 @@ package org.apache.spark.broadcast
 
 import java.io.{File, FileOutputStream, ObjectInputStream, OutputStream}
 import java.net.URL
+import java.util.concurrent.TimeUnit
 
 import it.unimi.dsi.fastutil.io.FastBufferedInputStream
 import it.unimi.dsi.fastutil.io.FastBufferedOutputStream
@@ -83,6 +84,8 @@ private object HttpBroadcast extends Logging {
   private val files = new TimeStampedHashSet[String]
   private val cleaner = new MetadataCleaner(MetadataCleanerType.HTTP_BROADCAST, cleanup)
 
+  private val httpReadTimeout = TimeUnit.MILLISECONDS.convert(5,TimeUnit.MINUTES).toInt
+
   private lazy val compressionCodec = CompressionCodec.createCodec()
 
   def initialize(isDriver: Boolean) {
@@ -138,10 +141,13 @@ private object HttpBroadcast extends Logging {
   def read[T](id: Long): T = {
     val url = serverUri + "/" + BroadcastBlockId(id).name
     val in = {
+      val httpConnection = new URL(url).openConnection()
+      httpConnection.setReadTimeout(httpReadTimeout)
+      val inputStream = httpConnection.getInputStream()
       if (compress) {
-        compressionCodec.compressedInputStream(new URL(url).openStream())
+        compressionCodec.compressedInputStream(inputStream)
       } else {
-        new FastBufferedInputStream(new URL(url).openStream(), bufferSize)
+        new FastBufferedInputStream(inputStream, bufferSize)
       }
     }
     val ser = SparkEnv.get.serializer.newInstance()


[2/2] git commit: Merge pull request #210 from haitaoyao/http-timeout

Posted by ma...@apache.org.
Merge pull request #210 from haitaoyao/http-timeout

add http timeout for httpbroadcast

While pulling task bytecode from HttpBroadcast server, there's no timeout value set. This may cause spark executor code hang and other task in the same executor process wait for the lock. I have encountered the issue in my cluster. Here's the stacktrace I captured  : https://gist.github.com/haitaoyao/7655830

So add a time out value to ensure the task fail fast.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/743a31a7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/743a31a7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/743a31a7

Branch: refs/heads/master
Commit: 743a31a7ca4421cbbd5b615b773997a06a7ab4ee
Parents: fb6875d db998a6
Author: Matei Zaharia <ma...@eecs.berkeley.edu>
Authored: Wed Nov 27 18:24:39 2013 -0800
Committer: Matei Zaharia <ma...@eecs.berkeley.edu>
Committed: Wed Nov 27 18:24:39 2013 -0800

----------------------------------------------------------------------
 .../scala/org/apache/spark/broadcast/HttpBroadcast.scala  | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)
----------------------------------------------------------------------