You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2014/07/23 19:31:52 UTC

git commit: [SPARK-2609] Log thread ID when spilling ExternalAppendOnlyMap

Repository: spark
Updated Branches:
  refs/heads/master 4c7243e10 -> 25921110f


[SPARK-2609] Log thread ID when spilling ExternalAppendOnlyMap

It's useful to know whether one thread is constantly spilling or multiple threads are spilling relatively infrequently. Right now everything looks a little jumbled and we can't tell which lines belong to the same thread. For instance:

```
06:14:37 ExternalAppendOnlyMap: Spilling in-memory map of 4 MB to disk (194 times so far)
06:14:37 ExternalAppendOnlyMap: Spilling in-memory map of 4 MB to disk (198 times so far)
06:14:37 ExternalAppendOnlyMap: Spilling in-memory map of 4 MB to disk (198 times so far)
06:14:37 ExternalAppendOnlyMap: Spilling in-memory map of 10 MB to disk (197 times so far)
06:14:38 ExternalAppendOnlyMap: Spilling in-memory map of 9 MB to disk (45 times so far)
06:14:38 ExternalAppendOnlyMap: Spilling in-memory map of 23 MB to disk (198 times so far)
06:14:38 ExternalAppendOnlyMap: Spilling in-memory map of 38 MB to disk (25 times so far)
06:14:38 ExternalAppendOnlyMap: Spilling in-memory map of 161 MB to disk (25 times so far)
06:14:39 ExternalAppendOnlyMap: Spilling in-memory map of 0 MB to disk (199 times so far)
06:14:39 ExternalAppendOnlyMap: Spilling in-memory map of 4 MB to disk (166 times so far)
06:14:39 ExternalAppendOnlyMap: Spilling in-memory map of 4 MB to disk (199 times so far)
06:14:39 ExternalAppendOnlyMap: Spilling in-memory map of 4 MB to disk (200 times so far)
```

Author: Andrew Or <an...@gmail.com>

Closes #1517 from andrewor14/external-log and squashes the following commits:

90e48bb [Andrew Or] Log thread ID when spilling


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/25921110
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/25921110
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/25921110

Branch: refs/heads/master
Commit: 25921110fcd5afe568bf0d25fccd232787af7911
Parents: 4c7243e
Author: Andrew Or <an...@gmail.com>
Authored: Wed Jul 23 10:31:45 2014 -0700
Committer: Matei Zaharia <ma...@databricks.com>
Committed: Wed Jul 23 10:31:45 2014 -0700

----------------------------------------------------------------------
 .../apache/spark/util/collection/ExternalAppendOnlyMap.scala   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/25921110/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
index 71ab2a3..be8f652 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
@@ -106,6 +106,7 @@ class ExternalAppendOnlyMap[K, V, C](
   private val fileBufferSize = sparkConf.getInt("spark.shuffle.file.buffer.kb", 100) * 1024
   private val keyComparator = new HashComparator[K]
   private val ser = serializer.newInstance()
+  private val threadId = Thread.currentThread().getId
 
   /**
    * Insert the given key and value into the map.
@@ -128,7 +129,6 @@ class ExternalAppendOnlyMap[K, V, C](
       // Atomically check whether there is sufficient memory in the global pool for
       // this map to grow and, if possible, allocate the required amount
       shuffleMemoryMap.synchronized {
-        val threadId = Thread.currentThread().getId
         val previouslyOccupiedMemory = shuffleMemoryMap.get(threadId)
         val availableMemory = maxMemoryThreshold -
           (shuffleMemoryMap.values.sum - previouslyOccupiedMemory.getOrElse(0L))
@@ -153,8 +153,8 @@ class ExternalAppendOnlyMap[K, V, C](
    */
   private def spill(mapSize: Long) {
     spillCount += 1
-    logWarning("Spilling in-memory map of %d MB to disk (%d time%s so far)"
-      .format(mapSize / (1024 * 1024), spillCount, if (spillCount > 1) "s" else ""))
+    logWarning("Thread %d spilling in-memory map of %d MB to disk (%d time%s so far)"
+      .format(threadId, mapSize / (1024 * 1024), spillCount, if (spillCount > 1) "s" else ""))
     val (blockId, file) = diskBlockManager.createTempBlock()
     var writer = blockManager.getDiskWriter(blockId, file, serializer, fileBufferSize)
     var objectsWritten = 0