You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by zh...@apache.org on 2017/03/28 19:47:57 UTC

[16/50] [abbrv] tez git commit: TEZ-3634. reduce the default buffer sizes in PipelinedSorter by a small amount. (sseth)

TEZ-3634. reduce the default buffer sizes in PipelinedSorter by a small
amount. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/2d8090e9
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/2d8090e9
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/2d8090e9

Branch: refs/heads/TEZ-1190
Commit: 2d8090e9cffa1f3643c93dd13507cea0a7a092a4
Parents: fc0897b
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed Feb 22 17:11:21 2017 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Wed Feb 22 17:11:21 2017 -0800

----------------------------------------------------------------------
 CHANGES.txt                                       |  1 +
 .../library/common/sort/impl/PipelinedSorter.java |  9 +++++++--
 .../common/sort/impl/TestPipelinedSorter.java     | 18 +++++++++---------
 3 files changed, 17 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/2d8090e9/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 5427e12..a1e23f4 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -7,6 +7,7 @@ INCOMPATIBLE CHANGES
 
 ALL CHANGES:
 
+  TEZ-3634. reduce the default buffer sizes in PipelinedSorter by a small amount.
   TEZ-3627. Use queue name available in RegisterApplicationMasterResponse for publishing to ATS.
   TEZ-3610. TEZ UI 0.7 0.9 compatibility for url query params and tez-app sub-routes
   TEZ-3625. Dag.getVertex should obtain a readlock.

http://git-wip-us.apache.org/repos/asf/tez/blob/2d8090e9/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java
index 4258fff..3d4bfbe 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java
@@ -132,7 +132,8 @@ public class PipelinedSorter extends ExternalSorter {
        * When lazy-allocation is enabled, framework takes care of auto
        * allocating memory on need basis. Desirable block size is set to 256MB
        */
-      MIN_BLOCK_SIZE = 256 << 20; //256 MB
+      //256 MB - 64 bytes. See comment for the 32MB allocation.
+      MIN_BLOCK_SIZE = ((256 << 20) - 64);
     } else {
       int minBlockSize = conf.getInt(TezRuntimeConfiguration
               .TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB,
@@ -267,7 +268,11 @@ public class PipelinedSorter extends ExternalSorter {
      */
     if (lazyAllocateMem) {
       if (buffers == null || buffers.isEmpty()) {
-        return 32 << 20; //32 MB
+        //32 MB - 64 bytes
+        // These buffers end up occupying 33554456 (32M + 24) bytes.
+        // On large JVMs (64G+), with G1Gc - the region size maxes out at
+        // 32M. Without the -64, this structure would end up using 2 regions.
+        return ((32 << 20) - 64);
       }
     }
 

http://git-wip-us.apache.org/repos/asf/tez/blob/2d8090e9/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestPipelinedSorter.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestPipelinedSorter.java b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestPipelinedSorter.java
index 80e7b14..d295640 100644
--- a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestPipelinedSorter.java
+++ b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestPipelinedSorter.java
@@ -601,13 +601,13 @@ public class TestPipelinedSorter {
         numOutputs, (128l << 20));
     assertTrue("Expected 1 sort buffers. current len=" + sorter.buffers.size(),
         sorter.buffers.size() == 1);
-    assertTrue(sorter.buffers.get(0).capacity() == 32 * 1024 * 1024);
+    assertTrue(sorter.buffers.get(0).capacity() == 32 * 1024 * 1024 - 64);
     writeData(sorter, 100, 1024*1024, false); //100 1 MB KV. Will spill
 
     //Now it should have created 2 buffers, 32 & 96 MB buffers.
     assertTrue(sorter.buffers.size() == 2);
-    assertTrue(sorter.buffers.get(0).capacity() == 32 * 1024 * 1024);
-    assertTrue(sorter.buffers.get(1).capacity() == 96 * 1024 * 1024);
+    assertTrue(sorter.buffers.get(0).capacity() == 32 * 1024 * 1024 - 64);
+    assertTrue(sorter.buffers.get(1).capacity() == 96 * 1024 * 1024 + 64);
     closeSorter(sorter);
     verifyCounters(sorter, outputContext);
 
@@ -619,12 +619,12 @@ public class TestPipelinedSorter {
         .TEZ_RUNTIME_PIPELINED_SORTER_LAZY_ALLOCATE_MEMORY, true);
     sorter = new PipelinedSorter(this.outputContext, conf, numOutputs, (300l << 20));
     assertTrue(sorter.buffers.size() == 1);
-    assertTrue(sorter.buffers.get(0).capacity() == 32 * 1024 * 1024);
+    assertTrue(sorter.buffers.get(0).capacity() == 32 * 1024 * 1024 - 64);
 
     writeData(sorter, 50, 1024*1024, false); //50 1 MB KV to allocate 2nd buf
     assertTrue(sorter.buffers.size() == 2);
-    assertTrue(sorter.buffers.get(0).capacity() == 32 * 1024 * 1024);
-    assertTrue(sorter.buffers.get(1).capacity() == 268 * 1024 * 1024);
+    assertTrue(sorter.buffers.get(0).capacity() == 32 * 1024 * 1024 - 64);
+    assertTrue(sorter.buffers.get(1).capacity() == 268 * 1024 * 1024 + 64);
 
     //48 MB. Do not pre-allocate.
     // Get 32 MB buffer first invariably and proceed with the rest.
@@ -635,13 +635,13 @@ public class TestPipelinedSorter {
         numOutputs, (48l << 20));
     assertTrue("Expected 1 sort buffers. current len=" + sorter.buffers.size(),
         sorter.buffers.size() == 1);
-    assertTrue(sorter.buffers.get(0).capacity() == 32 * 1024 * 1024);
+    assertTrue(sorter.buffers.get(0).capacity() == 32 * 1024 * 1024 - 64);
     writeData(sorter, 20, 1024*1024, false); //100 1 MB KV. Will spill
 
     //Now it should have created 2 buffers, 32 & 96 MB buffers.
     assertTrue(sorter.buffers.size() == 2);
-    assertTrue(sorter.buffers.get(0).capacity() == 32 * 1024 * 1024);
-    assertTrue(sorter.buffers.get(1).capacity() == 16 * 1024 * 1024);
+    assertTrue(sorter.buffers.get(0).capacity() == 32 * 1024 * 1024 - 64);
+    assertTrue(sorter.buffers.get(1).capacity() == 16 * 1024 * 1024 + 64);
     closeSorter(sorter);
   }