You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by an...@apache.org on 2015/05/09 02:13:59 UTC

spark git commit: [SPARK-6955] Perform port retries at NettyBlockTransferService level

Repository: spark
Updated Branches:
  refs/heads/master 1c78f6866 -> ffdc40ce7


[SPARK-6955] Perform port retries at NettyBlockTransferService level

Currently we're doing port retries in the TransportServer level, but this is not specified by the TransportContext API and it has other further-reaching impacts like causing undesirable behavior for the Yarn and Standalone shuffle services.

Author: Aaron Davidson <aa...@databricks.com>

Closes #5575 from aarondav/port-bind and squashes the following commits:

3c2d6ed [Aaron Davidson] Oops, never do it.
a5d9432 [Aaron Davidson] Remove shouldHostShuffleServiceIfEnabled
e901eb2 [Aaron Davidson] fix local-cluster mode for ExternalShuffleServiceSuite
59e5e38 [Aaron Davidson] [SPARK-6955] Perform port retries at NettyBlockTransferService level


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ffdc40ce
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ffdc40ce
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ffdc40ce

Branch: refs/heads/master
Commit: ffdc40ce7a799f2564f57b958d0f32f1d1636488
Parents: 1c78f68
Author: Aaron Davidson <aa...@databricks.com>
Authored: Fri May 8 17:13:55 2015 -0700
Committer: Andrew Or <an...@databricks.com>
Committed: Fri May 8 17:13:55 2015 -0700

----------------------------------------------------------------------
 .../apache/spark/deploy/LocalSparkCluster.scala |  4 +-
 .../netty/NettyBlockTransferService.scala       | 14 +++-
 .../netty/NettyBlockTransferServiceSuite.scala  | 78 ++++++++++++++++++++
 .../spark/network/server/TransportServer.java   | 45 +++--------
 4 files changed, 102 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/ffdc40ce/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
index f0e77c2..860e1a2 100644
--- a/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
@@ -48,7 +48,9 @@ class LocalSparkCluster(
     logInfo("Starting a local Spark cluster with " + numWorkers + " workers.")
 
     // Disable REST server on Master in this mode unless otherwise specified
-    val _conf = conf.clone().setIfMissing("spark.master.rest.enabled", "false")
+    val _conf = conf.clone()
+      .setIfMissing("spark.master.rest.enabled", "false")
+      .set("spark.shuffle.service.enabled", "false")
 
     /* Start the Master */
     val (masterSystem, masterPort, _, _) = Master.startSystemAndActor(localHostname, 0, 0, _conf)

http://git-wip-us.apache.org/repos/asf/spark/blob/ffdc40ce/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index 6181c0e..d650d5f 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -59,12 +59,22 @@ class NettyBlockTransferService(conf: SparkConf, securityManager: SecurityManage
     }
     transportContext = new TransportContext(transportConf, rpcHandler)
     clientFactory = transportContext.createClientFactory(clientBootstrap.toList)
-    server = transportContext.createServer(conf.getInt("spark.blockManager.port", 0),
-      serverBootstrap.toList)
+    server = createServer(serverBootstrap.toList)
     appId = conf.getAppId
     logInfo("Server created on " + server.getPort)
   }
 
+  /** Creates and binds the TransportServer, possibly trying multiple ports. */
+  private def createServer(bootstraps: List[TransportServerBootstrap]): TransportServer = {
+    def startService(port: Int): (TransportServer, Int) = {
+      val server = transportContext.createServer(port, bootstraps)
+      (server, server.getPort)
+    }
+
+    val portToTry = conf.getInt("spark.blockManager.port", 0)
+    Utils.startServiceOnPort(portToTry, startService, conf, getClass.getName)._1
+  }
+
   override def fetchBlocks(
       host: String,
       port: Int,

http://git-wip-us.apache.org/repos/asf/spark/blob/ffdc40ce/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
new file mode 100644
index 0000000..a41f8b7
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty
+
+import org.apache.spark.network.BlockDataManager
+import org.apache.spark.{SecurityManager, SparkConf}
+import org.mockito.Mockito.mock
+import org.scalatest._
+
+class NettyBlockTransferServiceSuite extends FunSuite with BeforeAndAfterEach with ShouldMatchers {
+  private var service0: NettyBlockTransferService = _
+  private var service1: NettyBlockTransferService = _
+
+  override def afterEach() {
+    if (service0 != null) {
+      service0.close()
+      service0 = null
+    }
+
+    if (service1 != null) {
+      service1.close()
+      service1 = null
+    }
+  }
+
+  test("can bind to a random port") {
+    service0 = createService(port = 0)
+    service0.port should not be 0
+  }
+
+  test("can bind to two random ports") {
+    service0 = createService(port = 0)
+    service1 = createService(port = 0)
+    service0.port should not be service1.port
+  }
+
+  test("can bind to a specific port") {
+    val port = 17634
+    service0 = createService(port)
+    service0.port should be >= port
+    service0.port should be <= (port + 10) // avoid testing equality in case of simultaneous tests
+  }
+
+  test("can bind to a specific port twice and the second increments") {
+    val port = 17634
+    service0 = createService(port)
+    service1 = createService(port)
+    service0.port should be >= port
+    service0.port should be <= (port + 10)
+    service1.port should be (service0.port + 1)
+  }
+
+  private def createService(port: Int): NettyBlockTransferService = {
+    val conf = new SparkConf()
+      .set("spark.app.id", s"test-${getClass.getName}")
+      .set("spark.blockManager.port", port.toString)
+    val securityManager = new SecurityManager(conf)
+    val blockDataManager = mock(classOf[BlockDataManager])
+    val service = new NettyBlockTransferService(conf, securityManager, numCores = 1)
+    service.init(blockDataManager)
+    service
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/ffdc40ce/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java
----------------------------------------------------------------------
diff --git a/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java b/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java
index 941ef95..f4fadb1 100644
--- a/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java
+++ b/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java
@@ -31,6 +31,7 @@ import io.netty.channel.ChannelInitializer;
 import io.netty.channel.ChannelOption;
 import io.netty.channel.EventLoopGroup;
 import io.netty.channel.socket.SocketChannel;
+import org.apache.spark.network.util.JavaUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -65,7 +66,12 @@ public class TransportServer implements Closeable {
     this.appRpcHandler = appRpcHandler;
     this.bootstraps = Lists.newArrayList(Preconditions.checkNotNull(bootstraps));
 
-    init(portToBind);
+    try {
+      init(portToBind);
+    } catch (RuntimeException e) {
+      JavaUtils.closeQuietly(this);
+      throw e;
+    }
   }
 
   public int getPort() {
@@ -114,7 +120,8 @@ public class TransportServer implements Closeable {
       }
     });
 
-    bindRightPort(portToBind);
+    channelFuture = bootstrap.bind(new InetSocketAddress(portToBind));
+    channelFuture.syncUninterruptibly();
 
     port = ((InetSocketAddress) channelFuture.channel().localAddress()).getPort();
     logger.debug("Shuffle server started on port :" + port);
@@ -135,38 +142,4 @@ public class TransportServer implements Closeable {
     }
     bootstrap = null;
   }
-
-  /**
-   * Attempt to bind to the specified port up to a fixed number of retries.
-   * If all attempts fail after the max number of retries, exit.
-   */
-  private void bindRightPort(int portToBind) {
-    int maxPortRetries = conf.portMaxRetries();
-
-    for (int i = 0; i <= maxPortRetries; i++) {
-      int tryPort = -1;
-      if (0 == portToBind) {
-        // Do not increment port if tryPort is 0, which is treated as a special port
-        tryPort = 0;
-      } else {
-        // If the new port wraps around, do not try a privilege port
-        tryPort = ((portToBind + i - 1024) % (65536 - 1024)) + 1024;
-      }
-      try {
-        channelFuture = bootstrap.bind(new InetSocketAddress(tryPort));
-        channelFuture.syncUninterruptibly();
-        return;
-      } catch (Exception e) {
-        logger.warn("Netty service could not bind on port " + tryPort +
-          ". Attempting the next port.");
-        if (i >= maxPortRetries) {
-          logger.error(e.getMessage() + ": Netty server failed after "
-            + maxPortRetries + " retries.");
-
-          // If it can't find a right port, it should exit directly.
-          System.exit(-1);
-        }
-      }
-    }
-  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org