You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2016/09/20 20:08:06 UTC

hbase git commit: HBASE-16658 Optimize UTF8 string/byte conversions (binlijin)

Repository: hbase
Updated Branches:
  refs/heads/master 66821206b -> 6624c676f


HBASE-16658 Optimize UTF8 string/byte conversions (binlijin)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/6624c676
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/6624c676
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/6624c676

Branch: refs/heads/master
Commit: 6624c676fe7043a95b47f84db4b21df0728ce359
Parents: 6682120
Author: tedyu <yu...@gmail.com>
Authored: Tue Sep 20 13:08:04 2016 -0700
Committer: tedyu <yu...@gmail.com>
Committed: Tue Sep 20 13:08:04 2016 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hbase/util/Bytes.java     | 38 +++++++++++++++-----
 1 file changed, 29 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/6624c676/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
----------------------------------------------------------------------
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
index 41292a5..2d7d3f6 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
@@ -24,10 +24,12 @@ import static com.google.common.base.Preconditions.checkPositionIndex;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.io.UnsupportedEncodingException;
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 import java.security.SecureRandom;
 import java.util.Arrays;
 import java.util.Collection;
@@ -35,15 +37,13 @@ import java.util.Comparator;
 import java.util.Iterator;
 import java.util.List;
 
-import com.google.protobuf.ByteString;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.classification.InterfaceStability;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellComparator;
 import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.classification.InterfaceStability;
 import org.apache.hadoop.io.RawComparator;
 import org.apache.hadoop.io.WritableComparator;
 import org.apache.hadoop.io.WritableUtils;
@@ -52,6 +52,7 @@ import sun.misc.Unsafe;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Lists;
+import com.google.protobuf.ByteString;
 
 /**
  * Utility class that handles byte arrays, conversions to/from other types,
@@ -73,6 +74,10 @@ public class Bytes implements Comparable<Bytes> {
   /** When we encode strings, we always specify UTF8 encoding */
   private static final Charset UTF8_CHARSET = Charset.forName(UTF8_ENCODING);
 
+  // Using the charset canonical name for String/byte[] conversions is much
+  // more efficient due to use of cached encoders/decoders.
+  private static final String UTF8_CSN = StandardCharsets.UTF_8.name();
+
   //HConstants.EMPTY_BYTE_ARRAY should be updated if this changed
   private static final byte [] EMPTY_BYTE_ARRAY = new byte [0];
 
@@ -563,7 +568,7 @@ public class Bytes implements Comparable<Bytes> {
    * @param off offset into array
    * @return String made from <code>b</code> or null
    */
-  public static String toString(final byte [] b, int off) {
+  public static String toString(final byte[] b, int off) {
     if (b == null) {
       return null;
     }
@@ -571,7 +576,12 @@ public class Bytes implements Comparable<Bytes> {
     if (len <= 0) {
       return "";
     }
-    return new String(b, off, len, UTF8_CHARSET);
+    try {
+      return new String(b, off, len, UTF8_CSN);
+    } catch (UnsupportedEncodingException e) {
+      // should never happen!
+      throw new IllegalArgumentException("UTF8 encoding is not supported", e);
+    }
   }
 
   /**
@@ -583,14 +593,19 @@ public class Bytes implements Comparable<Bytes> {
    * @param len length of utf-8 sequence
    * @return String made from <code>b</code> or null
    */
-  public static String toString(final byte [] b, int off, int len) {
+  public static String toString(final byte[] b, int off, int len) {
     if (b == null) {
       return null;
     }
     if (len == 0) {
       return "";
     }
-    return new String(b, off, len, UTF8_CHARSET);
+    try {
+      return new String(b, off, len, UTF8_CSN);
+    } catch (UnsupportedEncodingException e) {
+      // should never happen!
+      throw new IllegalArgumentException("UTF8 encoding is not supported", e);
+    }
   }
 
   /**
@@ -715,7 +730,12 @@ public class Bytes implements Comparable<Bytes> {
    * @return the byte array
    */
   public static byte[] toBytes(String s) {
-    return s.getBytes(UTF8_CHARSET);
+    try {
+      return s.getBytes(UTF8_CSN);
+    } catch (UnsupportedEncodingException e) {
+      // should never happen!
+      throw new IllegalArgumentException("UTF8 decoding is not supported", e);
+    }
   }
 
   /**