You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by mr...@apache.org on 2023/09/07 04:16:27 UTC

[spark] branch master updated: [SPARK-44845][YARN][DEPLOY] Fix file system uri comparison function

This is an automated email from the ASF dual-hosted git repository.

mridulm80 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 622bbf2e292 [SPARK-44845][YARN][DEPLOY] Fix file system uri comparison function
622bbf2e292 is described below

commit 622bbf2e29262c34021cb38c4c70f8eed258999b
Author: zekai-li <58...@users.noreply.github.com>
AuthorDate: Wed Sep 6 23:15:53 2023 -0500

    [SPARK-44845][YARN][DEPLOY] Fix file system uri comparison function
    
    ### What changes were proposed in this pull request?
    What changes were proposed in this pull request?
    
    ### Why are the changes needed?
    
    In the org.apache.spark.deploy.yarn.Client#compareUri method, hdfs://hadoop81:8020 and hdfs://192.168.0.81:8020 are regarded as different file systems (hadoop81 corresponds to 192.168.0.81). The specific reason is that in the last pr, different URIs of user information are also regarded as different file systems. Uri.getauthority is used to determine the user information, but authority contains the host so the URI above must be different from authority. To determine whether the user a [...]
    
    the last pr and issue link:
    https://issues.apache.org/jira/browse/SPARK-22587
    
    https://github.com/apache/spark/pull/19885
    
    ### Does this PR introduce _any_ user-facing change?
    
    ### How was this patch tested?
    
    Closes #42529 from zekai-li/master.
    
    Authored-by: zekai-li <58...@users.noreply.github.com>
    Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala  | 7 ++++---
 .../src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala  | 5 +++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 8257a08fd14..a675054b447 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -1618,9 +1618,10 @@ private[spark] object Client extends Logging {
       return false
     }
 
-    val srcAuthority = srcUri.getAuthority()
-    val dstAuthority = dstUri.getAuthority()
-    if (srcAuthority != null && !srcAuthority.equalsIgnoreCase(dstAuthority)) {
+    val srcUserInfo = Option(srcUri.getUserInfo).getOrElse("")
+    val dstUserInfo = Option(dstUri.getUserInfo).getOrElse("")
+
+    if (!srcUserInfo.equals(dstUserInfo)) {
       return false
     }
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index b7fb409ebc3..8802c59e78b 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -675,7 +675,8 @@ class ClientSuite extends SparkFunSuite
     ("files URI match test2", "file:///c:file1", "file://c:file2"),
     ("files URI match test3", "file://host/file1", "file://host/file2"),
     ("wasb URI match test", "wasb://bucket1@user", "wasb://bucket1@user/"),
-    ("hdfs URI match test", "hdfs:/path1", "hdfs:/path1")
+    ("hdfs URI match test1", "hdfs:/path1", "hdfs:/path1"),
+    ("hdfs URI match test2", "hdfs://localhost:8080", "hdfs://127.0.0.1:8080")
   )
 
   matching.foreach { t =>
@@ -691,7 +692,7 @@ class ClientSuite extends SparkFunSuite
     ("files URI unmatch test3", "file://host/file1", "file://host2/file2"),
     ("wasb URI unmatch test1", "wasb://bucket1@user", "wasb://bucket2@user/"),
     ("wasb URI unmatch test2", "wasb://bucket1@user", "wasb://bucket1@user2/"),
-    ("s3 URI unmatch test", "s3a://user@pass:bucket1/", "s3a://user2@pass2:bucket1/"),
+    ("s3 URI unmatch test", "s3a://user:pass@bucket1/", "s3a://user2:pass2@bucket1/"),
     ("hdfs URI unmatch test1", "hdfs://namenode1/path1", "hdfs://namenode1:8080/path2"),
     ("hdfs URI unmatch test2", "hdfs://namenode1:8020/path1", "hdfs://namenode1:8080/path2")
   )


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org