You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ad...@apache.org on 2014/05/27 08:17:46 UTC

git commit: SPARK-1932: Fix race conditions in onReceiveCallback and cachedPeers

Repository: spark
Updated Branches:
  refs/heads/master 90e281b55 -> 549830b0d


SPARK-1932: Fix race conditions in onReceiveCallback and cachedPeers

`var cachedPeers: Seq[BlockManagerId] = null` is used in `def replicate(blockId: BlockId, data: ByteBuffer, level: StorageLevel)` without proper protection.

There are two place will call `replicate(blockId, bytesAfterPut, level)`
* https://github.com/apache/spark/blob/17f3075bc4aa8cbed165f7b367f70e84b1bc8db9/core/src/main/scala/org/apache/spark/storage/BlockManager.scala#L644 runs in `connectionManager.futureExecContext`
* https://github.com/apache/spark/blob/17f3075bc4aa8cbed165f7b367f70e84b1bc8db9/core/src/main/scala/org/apache/spark/storage/BlockManager.scala#L752 `doPut` runs in `connectionManager.handleMessageExecutor`. `org.apache.spark.storage.BlockManagerWorker` calls `blockManager.putBytes` in `connectionManager.handleMessageExecutor`.

As they run in different `Executor`s, this is a race condition which may cause the memory pointed by `cachedPeers` is not correct even if `cachedPeers != null`.

The race condition of `onReceiveCallback` is that it's set in `BlockManagerWorker` but read in a different thread in `ConnectionManager.handleMessageExecutor`.

Author: zsxwing <zs...@gmail.com>

Closes #887 from zsxwing/SPARK-1932 and squashes the following commits:

524f69c [zsxwing] SPARK-1932: Fix race conditions in onReceiveCallback and cachedPeers


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/549830b0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/549830b0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/549830b0

Branch: refs/heads/master
Commit: 549830b0db2c8b069391224f3a73bb0d7f397f71
Parents: 90e281b
Author: zsxwing <zs...@gmail.com>
Authored: Mon May 26 23:17:39 2014 -0700
Committer: Aaron Davidson <aa...@databricks.com>
Committed: Mon May 26 23:17:39 2014 -0700

----------------------------------------------------------------------
 .../main/scala/org/apache/spark/network/ConnectionManager.scala   | 3 ++-
 core/src/main/scala/org/apache/spark/storage/BlockManager.scala   | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/549830b0/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
index dcbbc18..5dd5fd0 100644
--- a/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
+++ b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
@@ -93,7 +93,8 @@ private[spark] class ConnectionManager(port: Int, conf: SparkConf,
   implicit val futureExecContext = ExecutionContext.fromExecutor(
     Utils.newDaemonCachedThreadPool("Connection manager future execution context"))
 
-  private var onReceiveCallback: (BufferMessage, ConnectionManagerId) => Option[Message]= null
+  @volatile
+  private var onReceiveCallback: (BufferMessage, ConnectionManagerId) => Option[Message] = null
 
   private val authEnabled = securityManager.isAuthenticationEnabled()
 

http://git-wip-us.apache.org/repos/asf/spark/blob/549830b0/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 6534095..6e45008 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -772,7 +772,7 @@ private[spark] class BlockManager(
   /**
    * Replicate block to another node.
    */
-  var cachedPeers: Seq[BlockManagerId] = null
+  @volatile var cachedPeers: Seq[BlockManagerId] = null
   private def replicate(blockId: BlockId, data: ByteBuffer, level: StorageLevel) {
     val tLevel = StorageLevel(
       level.useDisk, level.useMemory, level.useOffHeap, level.deserialized, 1)