You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/04/16 00:03:06 UTC

[spark] branch branch-3.0 updated (e24115c -> 34d4da5)

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a change to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git.


    from e24115c  [SPARK-34834][NETWORK] Fix a potential Netty memory leak in TransportResponseHandler
     new 0033804  Revert "[SPARK-35002][YARN][TESTS][FOLLOW-UP] Fix java.net.BindException in MiniYARNCluster"
     new 34d4da5  [SPARK-35002][YARN][TESTS][FOLLOW-UP] Fix java.net.BindException in MiniYARNCluster

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala   | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


[spark] 01/02: Revert "[SPARK-35002][YARN][TESTS][FOLLOW-UP] Fix java.net.BindException in MiniYARNCluster"

Posted by gu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git

commit 0033804a650d8cc7b54e3e78a148e17efb977f4f
Author: HyukjinKwon <gu...@apache.org>
AuthorDate: Fri Apr 16 08:58:45 2021 +0900

    Revert "[SPARK-35002][YARN][TESTS][FOLLOW-UP] Fix java.net.BindException in MiniYARNCluster"
    
    This reverts commit 7c1177c1d6e104106bdf5c4f26b866a579f0a2c2.
---
 .../java/org/apache/spark/network/TestUtils.java   |  4 +--
 .../spark/deploy/yarn/BaseYarnClusterSuite.scala   | 29 +++-------------------
 2 files changed, 5 insertions(+), 28 deletions(-)

diff --git a/common/network-common/src/test/java/org/apache/spark/network/TestUtils.java b/common/network-common/src/test/java/org/apache/spark/network/TestUtils.java
index c2c5ffa..56a2b80 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/TestUtils.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/TestUtils.java
@@ -22,9 +22,7 @@ import java.net.InetAddress;
 public class TestUtils {
   public static String getLocalHost() {
     try {
-      return (System.getenv().containsKey("SPARK_LOCAL_IP"))?
-        System.getenv("SPARK_LOCAL_IP"):
-          InetAddress.getLocalHost().getHostAddress();
+      return InetAddress.getLocalHost().getHostAddress();
     } catch (Exception e) {
       throw new RuntimeException(e);
     }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index 460986a..f8ef0d0 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -28,12 +28,7 @@ import scala.concurrent.duration._
 import com.google.common.io.Files
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.server.MiniYARNCluster
-<<<<<<< HEAD
 import org.scalatest.{BeforeAndAfterAll, Matchers}
-=======
-import org.scalactic.source.Position
-import org.scalatest.{BeforeAndAfterAll, Tag}
->>>>>>> a153efa643d ([SPARK-35002][YARN][TESTS][FOLLOW-UP] Fix java.net.BindException in MiniYARNCluster)
 import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark._
@@ -45,7 +40,6 @@ import org.apache.spark.util.Utils
 
 abstract class BaseYarnClusterSuite
   extends SparkFunSuite with BeforeAndAfterAll with Matchers with Logging {
-  private var isBindSuccessful = true
 
   // log4j configuration for the YARN containers, so that their output is collected
   // by YARN instead of trying to overwrite unit-tests.log.
@@ -69,14 +63,6 @@ abstract class BaseYarnClusterSuite
 
   def newYarnConfig(): YarnConfiguration
 
-  override protected def test(testName: String, testTags: Tag*)(testFun: => Any)
-                             (implicit pos: Position): Unit = {
-    super.test(testName, testTags: _*) {
-      assume(isBindSuccessful, "Mini Yarn cluster should be able to bind.")
-      testFun
-    }
-  }
-
   override def beforeAll(): Unit = {
     super.beforeAll()
 
@@ -93,16 +79,9 @@ abstract class BaseYarnClusterSuite
     yarnConf.set("yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage",
       "100.0")
 
-    try {
-      yarnCluster = new MiniYARNCluster(getClass().getName(), 1, 1, 1)
-      yarnCluster.init(yarnConf)
-      yarnCluster.start()
-    } catch {
-      case e: Throwable if org.apache.commons.lang3.exception.ExceptionUtils.indexOfThrowable(
-          e, classOf[java.net.BindException]) != -1 =>
-        isBindSuccessful = false
-        return
-    }
+    yarnCluster = new MiniYARNCluster(getClass().getName(), 1, 1, 1)
+    yarnCluster.init(yarnConf)
+    yarnCluster.start()
 
     // There's a race in MiniYARNCluster in which start() may return before the RM has updated
     // its address in the configuration. You can see this in the logs by noticing that when
@@ -138,7 +117,7 @@ abstract class BaseYarnClusterSuite
 
   override def afterAll(): Unit = {
     try {
-      if (yarnCluster != null) yarnCluster.stop()
+      yarnCluster.stop()
     } finally {
       super.afterAll()
     }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


[spark] 02/02: [SPARK-35002][YARN][TESTS][FOLLOW-UP] Fix java.net.BindException in MiniYARNCluster

Posted by gu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git

commit 34d4da5197b55faa46ef28e517568f84756126c5
Author: HyukjinKwon <gu...@apache.org>
AuthorDate: Wed Apr 14 17:13:48 2021 +0800

    [SPARK-35002][YARN][TESTS][FOLLOW-UP] Fix java.net.BindException in MiniYARNCluster
    
    This PR fixes two tests below:
    
    https://github.com/apache/spark/runs/2320161984
    
    ```
    [info] YarnShuffleIntegrationSuite:
    [info] org.apache.spark.deploy.yarn.YarnShuffleIntegrationSuite *** ABORTED *** (228 milliseconds)
    [info]   org.apache.hadoop.yarn.exceptions.YarnRuntimeException: org.apache.hadoop.yarn.webapp.WebAppException: Error starting http server
    [info]   at org.apache.hadoop.yarn.server.MiniYARNCluster.startResourceManager(MiniYARNCluster.java:373)
    [info]   at org.apache.hadoop.yarn.server.MiniYARNCluster.access$300(MiniYARNCluster.java:128)
    [info]   at org.apache.hadoop.yarn.server.MiniYARNCluster$ResourceManagerWrapper.serviceStart(MiniYARNCluster.java:503)
    [info]   at org.apache.hadoop.service.AbstractService.start(AbstractService.java:194)
    [info]   at org.apache.hadoop.service.CompositeService.serviceStart(CompositeService.java:121)
    [info]   at org.apache.hadoop.yarn.server.MiniYARNCluster.serviceStart(MiniYARNCluster.java:322)
    [info]   at org.apache.hadoop.service.AbstractService.start(AbstractService.java:194)
    [info]   at org.apache.spark.deploy.yarn.BaseYarnClusterSuite.beforeAll(BaseYarnClusterSuite.scala:95)
    ...
    [info]   Cause: java.net.BindException: Port in use: fv-az186-831:0
    [info]   at org.apache.hadoop.http.HttpServer2.constructBindException(HttpServer2.java:1231)
    [info]   at org.apache.hadoop.http.HttpServer2.bindForSinglePort(HttpServer2.java:1253)
    [info]   at org.apache.hadoop.http.HttpServer2.openListeners(HttpServer2.java:1316)
    [info]   at org.apache.hadoop.http.HttpServer2.start(HttpServer2.java:1167)
    [info]   at org.apache.hadoop.yarn.webapp.WebApps$Builder.start(WebApps.java:449)
    [info]   at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.startWepApp(ResourceManager.java:1247)
    [info]   at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.serviceStart(ResourceManager.java:1356)
    [info]   at org.apache.hadoop.service.AbstractService.start(AbstractService.java:194)
    [info]   at org.apache.hadoop.yarn.server.MiniYARNCluster.startResourceManager(MiniYARNCluster.java:365)
    [info]   at org.apache.hadoop.yarn.server.MiniYARNCluster.access$300(MiniYARNCluster.java:128)
    [info]   at org.apache.hadoop.yarn.server.MiniYARNCluster$ResourceManagerWrapper.serviceStart(MiniYARNCluster.java:503)
    [info]   at org.apache.hadoop.service.AbstractService.start(AbstractService.java:194)
    [info]   at org.apache.hadoop.service.CompositeService.serviceStart(CompositeService.java:121)
    [info]   at org.apache.hadoop.yarn.server.MiniYARNCluster.serviceStart(MiniYARNCluster.java:322)
    [info]   at org.apache.hadoop.service.AbstractService.start(AbstractService.java:194)
    [info]   at org.apache.spark.deploy.yarn.BaseYarnClusterSuite.beforeAll(BaseYarnClusterSuite.scala:95)
    [info]   at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:212)
    [info]   at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
    [info]   at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
    [info]   at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:61)
    ...
    ```
    
    https://github.com/apache/spark/runs/2323342094
    
    ```
    [info] Test org.apache.spark.network.shuffle.ExternalShuffleSecuritySuite.testBadSecret started
    [error] Test org.apache.spark.network.shuffle.ExternalShuffleSecuritySuite.testBadSecret failed: java.lang.AssertionError: Connecting to /10.1.0.161:39895 timed out (120000 ms), took 120.081 sec
    [error]     at org.apache.spark.network.shuffle.ExternalShuffleSecuritySuite.testBadSecret(ExternalShuffleSecuritySuite.java:85)
    [error]     ...
    [info] Test org.apache.spark.network.shuffle.ExternalShuffleSecuritySuite.testBadAppId started
    [error] Test org.apache.spark.network.shuffle.ExternalShuffleSecuritySuite.testBadAppId failed: java.lang.AssertionError: Connecting to /10.1.0.198:44633 timed out (120000 ms), took 120.08 sec
    [error]     at org.apache.spark.network.shuffle.ExternalShuffleSecuritySuite.testBadAppId(ExternalShuffleSecuritySuite.java:76)
    [error]     ...
    [info] Test org.apache.spark.network.shuffle.ExternalShuffleSecuritySuite.testValid started
    [error] Test org.apache.spark.network.shuffle.ExternalShuffleSecuritySuite.testValid failed: java.io.IOException: Connecting to /10.1.0.119:43575 timed out (120000 ms), took 120.089 sec
    [error]     at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:285)
    [error]     at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:218)
    [error]     at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:230)
    [error]     at org.apache.spark.network.shuffle.ExternalBlockStoreClient.registerWithShuffleServer(ExternalBlockStoreClient.java:211)
    [error]     at org.apache.spark.network.shuffle.ExternalShuffleSecuritySuite.validate(ExternalShuffleSecuritySuite.java:108)
    [error]     at org.apache.spark.network.shuffle.ExternalShuffleSecuritySuite.testValid(ExternalShuffleSecuritySuite.java:68)
    [error]     ...
    [info] Test org.apache.spark.network.shuffle.ExternalShuffleSecuritySuite.testEncryption started
    [error] Test org.apache.spark.network.shuffle.ExternalShuffleSecuritySuite.testEncryption failed: java.io.IOException: Connecting to /10.1.0.248:35271 timed out (120000 ms), took 120.014 sec
    [error]     at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:285)
    [error]     at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:218)
    [error]     at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:230)
    [error]     at org.apache.spark.network.shuffle.ExternalBlockStoreClient.registerWithShuffleServer(ExternalBlockStoreClient.java:211)
    [error]     at org.apache.spark.network.shuffle.ExternalShuffleSecuritySuite.validate(ExternalShuffleSecuritySuite.java:108)
    [error]     at org.apache.spark.network.shuffle.ExternalShuffleSecuritySuite.testEncryption(ExternalShu
    ```
    
    For Yarn cluster suites, its difficult to fix. This PR makes it skipped if it fails to bind.
    For shuffle related suites, it uses local host
    
    To make the tests stable
    
    No, dev-only.
    
    Its tested in GitHub Actions: https://github.com/HyukjinKwon/spark/runs/2340210765
    
    Closes #32126 from HyukjinKwon/SPARK-35002-followup.
    
    Authored-by: HyukjinKwon <gu...@apache.org>
    Signed-off-by: Yuming Wang <yu...@ebay.com>
    (cherry picked from commit a153efa643dcb1d8e6c2242846b3db0b2be39ae7)
    Signed-off-by: HyukjinKwon <gu...@apache.org>
---
 .../java/org/apache/spark/network/TestUtils.java   |  4 +++-
 .../spark/deploy/yarn/BaseYarnClusterSuite.scala   | 27 ++++++++++++++++++----
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/common/network-common/src/test/java/org/apache/spark/network/TestUtils.java b/common/network-common/src/test/java/org/apache/spark/network/TestUtils.java
index 56a2b80..c2c5ffa 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/TestUtils.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/TestUtils.java
@@ -22,7 +22,9 @@ import java.net.InetAddress;
 public class TestUtils {
   public static String getLocalHost() {
     try {
-      return InetAddress.getLocalHost().getHostAddress();
+      return (System.getenv().containsKey("SPARK_LOCAL_IP"))?
+        System.getenv("SPARK_LOCAL_IP"):
+          InetAddress.getLocalHost().getHostAddress();
     } catch (Exception e) {
       throw new RuntimeException(e);
     }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index f8ef0d0..64ae21b 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -28,7 +28,8 @@ import scala.concurrent.duration._
 import com.google.common.io.Files
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.server.MiniYARNCluster
-import org.scalatest.{BeforeAndAfterAll, Matchers}
+import org.scalactic.source.Position
+import org.scalatest.{BeforeAndAfterAll, Matchers, Tag}
 import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark._
@@ -40,6 +41,7 @@ import org.apache.spark.util.Utils
 
 abstract class BaseYarnClusterSuite
   extends SparkFunSuite with BeforeAndAfterAll with Matchers with Logging {
+  private var isBindSuccessful = true
 
   // log4j configuration for the YARN containers, so that their output is collected
   // by YARN instead of trying to overwrite unit-tests.log.
@@ -63,6 +65,14 @@ abstract class BaseYarnClusterSuite
 
   def newYarnConfig(): YarnConfiguration
 
+  override protected def test(testName: String, testTags: Tag*)(testFun: => Any)
+                             (implicit pos: Position): Unit = {
+    super.test(testName, testTags: _*) {
+      assume(isBindSuccessful, "Mini Yarn cluster should be able to bind.")
+      testFun
+    }
+  }
+
   override def beforeAll(): Unit = {
     super.beforeAll()
 
@@ -79,9 +89,16 @@ abstract class BaseYarnClusterSuite
     yarnConf.set("yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage",
       "100.0")
 
-    yarnCluster = new MiniYARNCluster(getClass().getName(), 1, 1, 1)
-    yarnCluster.init(yarnConf)
-    yarnCluster.start()
+    try {
+      yarnCluster = new MiniYARNCluster(getClass().getName(), 1, 1, 1)
+      yarnCluster.init(yarnConf)
+      yarnCluster.start()
+    } catch {
+      case e: Throwable if org.apache.commons.lang3.exception.ExceptionUtils.indexOfThrowable(
+          e, classOf[java.net.BindException]) != -1 =>
+        isBindSuccessful = false
+        return
+    }
 
     // There's a race in MiniYARNCluster in which start() may return before the RM has updated
     // its address in the configuration. You can see this in the logs by noticing that when
@@ -117,7 +134,7 @@ abstract class BaseYarnClusterSuite
 
   override def afterAll(): Unit = {
     try {
-      yarnCluster.stop()
+      if (yarnCluster != null) yarnCluster.stop()
     } finally {
       super.afterAll()
     }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org