You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oozie.apache.org by an...@apache.org on 2018/05/16 11:55:19 UTC

oozie git commit: OOZIE-3250 Reduce heap waste by reducing duplicate byte[] count (andras.piros)

Repository: oozie
Updated Branches:
  refs/heads/master 61c646c33 -> 438ba6df7


OOZIE-3250 Reduce heap waste by reducing duplicate byte[] count (andras.piros)


Project: http://git-wip-us.apache.org/repos/asf/oozie/repo
Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/438ba6df
Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/438ba6df
Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/438ba6df

Branch: refs/heads/master
Commit: 438ba6df78fcecf92553db875c1e4624d4555d1f
Parents: 61c646c
Author: Andras Piros <an...@cloudera.com>
Authored: Wed May 16 13:47:28 2018 +0200
Committer: Andras Piros <an...@cloudera.com>
Committed: Wed May 16 13:54:42 2018 +0200

----------------------------------------------------------------------
 .../main/java/org/apache/oozie/BinaryBlob.java  | 16 +++---
 .../main/java/org/apache/oozie/StringBlob.java  |  7 +--
 .../org/apache/oozie/util/ByteArrayUtils.java   | 48 ++++++++++++++++++
 .../apache/oozie/util/TestByteArrayUtils.java   | 53 ++++++++++++++++++++
 release-log.txt                                 |  1 +
 5 files changed, 115 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/oozie/blob/438ba6df/core/src/main/java/org/apache/oozie/BinaryBlob.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/oozie/BinaryBlob.java b/core/src/main/java/org/apache/oozie/BinaryBlob.java
index 69bf67e..36a0e60 100644
--- a/core/src/main/java/org/apache/oozie/BinaryBlob.java
+++ b/core/src/main/java/org/apache/oozie/BinaryBlob.java
@@ -21,8 +21,10 @@ package org.apache.oozie;
 import java.io.ByteArrayInputStream;
 import java.io.DataInputStream;
 import java.io.IOException;
+
 import org.apache.oozie.compression.CodecFactory;
 import org.apache.oozie.compression.CompressionCodec;
+import org.apache.oozie.util.ByteArrayUtils;
 
 /**
  * BinaryBlob to maintain compress and uncompressed data
@@ -40,11 +42,11 @@ public class BinaryBlob {
      */
     public BinaryBlob(byte[] byteArray, boolean isUncompressed) {
         if (isUncompressed) {
-            this.bytes = byteArray;
+            this.bytes = ByteArrayUtils.weakIntern(byteArray);
             this.rawBlob = null;
         }
         else {
-            this.rawBlob = byteArray;
+            this.rawBlob = ByteArrayUtils.weakIntern(byteArray);
         }
     }
 
@@ -54,7 +56,7 @@ public class BinaryBlob {
      * @param byteArray the byte array
      */
     public void setBytes(byte[] byteArray) {
-        this.bytes = byteArray;
+        this.bytes = ByteArrayUtils.weakIntern(byteArray);
         this.rawBlob = null;
     }
 
@@ -74,10 +76,10 @@ public class BinaryBlob {
             DataInputStream dais = new DataInputStream(new ByteArrayInputStream(rawBlob));
             CompressionCodec codec = CodecFactory.getDeCompressionCodec(dais);
             if (codec != null) {
-                bytes = codec.decompressToBytes(dais);
+                bytes = ByteArrayUtils.weakIntern(codec.decompressToBytes(dais));
             }
             else {
-                bytes = rawBlob;
+                bytes = ByteArrayUtils.weakIntern(rawBlob);
             }
             dais.close();
         }
@@ -104,14 +106,14 @@ public class BinaryBlob {
         if (CodecFactory.isCompressionEnabled()) {
             byte[] headerBytes = CodecFactory.getHeaderBytes();
             try {
-                rawBlob = CodecFactory.getCompressionCodec().compressBytes(headerBytes, bytes);
+                rawBlob = ByteArrayUtils.weakIntern(CodecFactory.getCompressionCodec().compressBytes(headerBytes, bytes));
             }
             catch (IOException ex) {
                 throw new RuntimeException(ex);
             }
         }
         else {
-            rawBlob = bytes;
+            rawBlob = ByteArrayUtils.weakIntern(bytes);
         }
         return rawBlob;
     }

http://git-wip-us.apache.org/repos/asf/oozie/blob/438ba6df/core/src/main/java/org/apache/oozie/StringBlob.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/oozie/StringBlob.java b/core/src/main/java/org/apache/oozie/StringBlob.java
index b453f09..6c77601 100644
--- a/core/src/main/java/org/apache/oozie/StringBlob.java
+++ b/core/src/main/java/org/apache/oozie/StringBlob.java
@@ -24,6 +24,7 @@ import java.io.IOException;
 
 import org.apache.oozie.compression.CodecFactory;
 import org.apache.oozie.compression.CompressionCodec;
+import org.apache.oozie.util.ByteArrayUtils;
 import org.apache.oozie.util.StringUtils;
 
 /**
@@ -40,7 +41,7 @@ public class StringBlob {
      * @param byteArray the byte array
      */
     public StringBlob(byte[] byteArray) {
-        this.rawBlob = byteArray;
+        this.rawBlob = ByteArrayUtils.weakIntern(byteArray);
     }
 
     /**
@@ -109,14 +110,14 @@ public class StringBlob {
         if (CodecFactory.isCompressionEnabled()) {
             byte[] bytes = CodecFactory.getHeaderBytes();
             try {
-                rawBlob = CodecFactory.getCompressionCodec().compressString(bytes, string);
+                rawBlob = ByteArrayUtils.weakIntern(CodecFactory.getCompressionCodec().compressString(bytes, string));
             }
             catch (IOException ex) {
                 throw new RuntimeException(ex);
             }
         }
         else {
-            rawBlob = string.getBytes();
+            rawBlob = ByteArrayUtils.weakIntern(string.getBytes());
         }
         return rawBlob;
     }

http://git-wip-us.apache.org/repos/asf/oozie/blob/438ba6df/core/src/main/java/org/apache/oozie/util/ByteArrayUtils.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/oozie/util/ByteArrayUtils.java b/core/src/main/java/org/apache/oozie/util/ByteArrayUtils.java
new file mode 100644
index 0000000..1ab59ce
--- /dev/null
+++ b/core/src/main/java/org/apache/oozie/util/ByteArrayUtils.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.oozie.util;
+
+import com.google.common.collect.Interner;
+import com.google.common.collect.Interners;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Utility methods for working with {@link byte[]} primitive values.
+ * <p>
+ * Interning {@code byte[]} instances doesn't seem to take too many resources both in terms of CPU and memory: 10k * 10k random
+ * {@code byte[]} allocation alone takes around 7.8 seconds, allocation plus interning takes around 8.0 seconds.
+ */
+public class ByteArrayUtils {
+    private static final Interner<ByteBuffer> BYTE_BUFFER_INTERNER = Interners.newWeakInterner();
+
+    /**
+     * Return the internalized {@code byte[]}, or {@code null} if the given {@code byte[]} is {@code null}. A weak reference remains
+     * to each {@code byte[]} interned, so these are not prevented from being garbage-collected.
+     * @param values The {@code byte[]} to intern
+     * @return The identical {@code byte[]} cached in the JVM's weak {@link Interner}.
+     */
+    public static byte[] weakIntern(final byte[] values) {
+        if (values == null) {
+            return values;
+        }
+
+        return BYTE_BUFFER_INTERNER.intern(ByteBuffer.wrap(values)).array();
+    }
+}

http://git-wip-us.apache.org/repos/asf/oozie/blob/438ba6df/core/src/test/java/org/apache/oozie/util/TestByteArrayUtils.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/oozie/util/TestByteArrayUtils.java b/core/src/test/java/org/apache/oozie/util/TestByteArrayUtils.java
new file mode 100644
index 0000000..cdf7cd0
--- /dev/null
+++ b/core/src/test/java/org/apache/oozie/util/TestByteArrayUtils.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.oozie.util;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestByteArrayUtils {
+
+    @Test
+    public void testByteArrayInterningGivesSameInstances() {
+        final int byteArrayCount = 1000;
+        final int elementCount = 100;
+        final byte[][] sameContent = new byte[byteArrayCount][];
+
+        for (int i = 0; i < byteArrayCount; i++) {
+            final byte[] source = new byte[elementCount];
+            sameContent[i] = source;
+            for (int j = 0; j < elementCount; j++) {
+                source[j] = (byte) j;
+            }
+        }
+
+        for (int i = 1; i < byteArrayCount; i++) {
+            Assert.assertTrue("copied byte[]s should be another instances", sameContent[i - 1] != sameContent[i]);
+        }
+
+        final byte[][] interned = new byte[byteArrayCount][];
+        for (int i = 0; i < byteArrayCount; i++) {
+            interned[i] = ByteArrayUtils.weakIntern(sameContent[i]);
+        }
+
+        for (int i = 1; i < byteArrayCount; i++) {
+            Assert.assertTrue("weak interned byte[]s should be the same instance", interned[i - 1] == interned[i]);
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/oozie/blob/438ba6df/release-log.txt
----------------------------------------------------------------------
diff --git a/release-log.txt b/release-log.txt
index fd7bd76..267af2a 100644
--- a/release-log.txt
+++ b/release-log.txt
@@ -1,5 +1,6 @@
 -- Oozie 5.1.0 release (trunk - unreleased)
 
+OOZIE-3250 Reduce heap waste by reducing duplicate byte[] count (andras.piros)
 OOZIE-3240 Flaky test TestJMSAccessorService#testConnectionRetry (pbacsko via gezapeti)
 OOZIE-3246 Flaky test TestJMSJobEventListener#testConnectionDrop (pbacsko via gezapeti)
 OOZIE-3236 Fix flaky test TestHiveActionExecutor#testHiveAction (pbacsko via gezapeti)