You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by "mridulm (via GitHub)" <gi...@apache.org> on 2023/07/05 04:08:47 UTC

[GitHub] [spark] mridulm commented on a diff in pull request #41821: [SPARK-44272][YARN] Path Inconsistency when Operating statCache within Yarn Client

mridulm commented on code in PR #41821:
URL: https://github.com/apache/spark/pull/41821#discussion_r1252511663


##########
resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala:
##########
@@ -45,6 +45,63 @@ class ClientDistributedCacheManagerSuite extends SparkFunSuite with MockitoSugar
     }
   }
 
+  /*
+  * This this a customized ClientDistributedCacheManager to test statCache.
+  * */
+  class CustomizedClientDistributedCacheManager extends ClientDistributedCacheManager {
+    override def addResource(fs: FileSystem, conf: Configuration, destPath: Path,
+      localResources: HashMap[String, LocalResource], resourceType: LocalResourceType,
+      link: String, statCache: Map[URI, FileStatus], appMasterOnly: Boolean): Unit = {
+      statCache.getOrElseUpdate(destPath.toUri(), fs.getFileStatus(destPath))
+    }
+
+    /*
+    * This simulates the isPublic method. It will return true if the result is cached in the
+    *  statCache.
+    * */
+    def isPublicResultCached(uri: URI, statCache: Map[URI, FileStatus]): Boolean = {
+      statCache.get(uri) match {
+        case Some(_) => true
+        case None => false
+      }

Review Comment:
   ```suggestion
         statCache.contains(uri)
   ```



##########
resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala:
##########
@@ -45,6 +45,63 @@ class ClientDistributedCacheManagerSuite extends SparkFunSuite with MockitoSugar
     }
   }
 
+  /*
+  * This this a customized ClientDistributedCacheManager to test statCache.
+  * */
+  class CustomizedClientDistributedCacheManager extends ClientDistributedCacheManager {
+    override def addResource(fs: FileSystem, conf: Configuration, destPath: Path,
+      localResources: HashMap[String, LocalResource], resourceType: LocalResourceType,
+      link: String, statCache: Map[URI, FileStatus], appMasterOnly: Boolean): Unit = {
+      statCache.getOrElseUpdate(destPath.toUri(), fs.getFileStatus(destPath))
+    }
+
+    /*
+    * This simulates the isPublic method. It will return true if the result is cached in the
+    *  statCache.
+    * */
+    def isPublicResultCached(uri: URI, statCache: Map[URI, FileStatus]): Boolean = {
+      statCache.get(uri) match {
+        case Some(_) => true
+        case None => false
+      }
+    }
+  }
+
+  test("SPARK-44272: test addResource added FileStatus to statCache and getVisibility can read" +
+    " from statCache") {
+    val distMgr = new CustomizedClientDistributedCacheManager()
+    val fs = mock[FileSystem]
+    val conf = new Configuration()
+    val destPath = new Path("file:///foo.invalid.com:8080/tmp/testing")
+    val localResources = HashMap[String, LocalResource]()
+    val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]()
+    assert(distMgr.isPublicResultCached(destPath.toUri(), statCache) === false)
+    distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, "link",
+      statCache, false)
+    assert(distMgr.isPublicResultCached(destPath.toUri(), statCache) === true)

Review Comment:
   This test is not checking functionality we have introduced - it is testing `CustomizedClientDistributedCacheManager`, not `ClientDistributedCacheManager `.
   (We can remove the `extends ClientDistributedCacheManager` and the test would still pass)



##########
resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala:
##########
@@ -45,6 +45,63 @@ class ClientDistributedCacheManagerSuite extends SparkFunSuite with MockitoSugar
     }
   }
 
+  /*
+  * This this a customized ClientDistributedCacheManager to test statCache.
+  * */
+  class CustomizedClientDistributedCacheManager extends ClientDistributedCacheManager {
+    override def addResource(fs: FileSystem, conf: Configuration, destPath: Path,
+      localResources: HashMap[String, LocalResource], resourceType: LocalResourceType,
+      link: String, statCache: Map[URI, FileStatus], appMasterOnly: Boolean): Unit = {
+      statCache.getOrElseUpdate(destPath.toUri(), fs.getFileStatus(destPath))
+    }
+
+    /*
+    * This simulates the isPublic method. It will return true if the result is cached in the
+    *  statCache.
+    * */
+    def isPublicResultCached(uri: URI, statCache: Map[URI, FileStatus]): Boolean = {
+      statCache.get(uri) match {
+        case Some(_) => true
+        case None => false
+      }
+    }
+  }
+
+  test("SPARK-44272: test addResource added FileStatus to statCache and getVisibility can read" +
+    " from statCache") {
+    val distMgr = new CustomizedClientDistributedCacheManager()
+    val fs = mock[FileSystem]
+    val conf = new Configuration()
+    val destPath = new Path("file:///foo.invalid.com:8080/tmp/testing")
+    val localResources = HashMap[String, LocalResource]()
+    val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]()
+    assert(distMgr.isPublicResultCached(destPath.toUri(), statCache) === false)
+    distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, "link",
+      statCache, false)
+    assert(distMgr.isPublicResultCached(destPath.toUri(), statCache) === true)
+  }
+
+  test("SPARK-44272: test getParentURI") {
+    val distMgr = new CustomizedClientDistributedCacheManager()

Review Comment:
   Cant this not be `ClientDistributedCacheManager` ?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org