You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2016/07/12 19:22:34 UTC

svn commit: r1752335 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java main/java/org/apache/pdfbox/util/Hex.java test/java/org/apache/pdfbox/util/TestHexUtil.java

Author: tilman
Date: Tue Jul 12 19:22:34 2016
New Revision: 1752335

URL: http://svn.apache.org/viewvc?rev=1752335&view=rev
Log:
PDFBOX-3418: optimize string to hex conversion, as suggested by Michael Doswald

Added:
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestHexUtil.java   (with props)
Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java?rev=1752335&r1=1752334&r2=1752335&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java Tue Jul 12 19:22:34 2016
@@ -26,6 +26,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 import org.apache.pdfbox.util.Charsets;
+import org.apache.pdfbox.util.Hex;
 
 /**
  * Writes ToUnicode Mapping Files.
@@ -154,15 +155,15 @@ final class ToUnicodeWriter
             {
                 int index = batch * 100 + j;
                 writer.write('<');
-                writer.write(toHex(srcFrom.get(index)));
+                writer.write(Hex.getChars(srcFrom.get(index).shortValue()));
                 writer.write("> ");
 
                 writer.write('<');
-                writer.write(toHex(srcTo.get(index)));
+                writer.write(Hex.getChars(srcTo.get(index).shortValue()));
                 writer.write("> ");
 
-                writer.write("<");
-                writer.write(stringToHex(dstString.get(index)));
+                writer.write('<');
+                writer.write(Hex.getCharsUTF16BE(dstString.get(index)));
                 writer.write(">\n");
             }
             writeLine(writer, "endbfrange\n");
@@ -182,20 +183,4 @@ final class ToUnicodeWriter
         writer.write(text);
         writer.write('\n');
     }
-
-    private String toHex(int num)
-    {
-        return String.format("%04X", num);
-    }
-
-    private String stringToHex(String text)
-    {
-        // use of non-BMP code points requires PDF 1.5 or later, otherwise we're limited to UCS-2
-        StringBuilder sb = new StringBuilder();
-        for (byte b : text.getBytes(Charsets.UTF_16BE))
-        {
-            sb.append(String.format("%02X", b));
-        }
-        return sb.toString();
-    }
 }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java?rev=1752335&r1=1752334&r2=1752335&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java Tue Jul 12 19:22:34 2016
@@ -33,9 +33,8 @@ public final class Hex
      * https://stackoverflow.com/questions/2817752/java-code-to-convert-byte-to-hexadecimal
      *
      */
-    private static final String HEXES_STRING = "0123456789ABCDEF";
-
-    private static final byte[] HEXES = HEXES_STRING.getBytes(Charsets.US_ASCII);
+    private static final byte[] HEX_BYTES = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
+    private static final char[] HEX_CHARS = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
 
     private Hex() {}
 
@@ -44,7 +43,7 @@ public final class Hex
      */
     public static String getString(byte b)
     {
-        char[] chars = new char[]{HEXES_STRING.charAt(getHighNibble(b)), HEXES_STRING.charAt(getLowNibble(b))};
+        char[] chars = new char[]{HEX_CHARS[getHighNibble(b)], HEX_CHARS[getLowNibble(b)]};
         return new String(chars);
     }
 
@@ -56,7 +55,7 @@ public final class Hex
         StringBuilder string = new StringBuilder(bytes.length * 2);
         for (byte b : bytes)
         {
-            string.append(HEXES_STRING.charAt(getHighNibble(b))).append(HEXES_STRING.charAt(getLowNibble(b)));
+            string.append(HEX_CHARS[getHighNibble(b)]).append(HEX_CHARS[getLowNibble(b)]);
         }
         return string.toString();
     }
@@ -66,7 +65,7 @@ public final class Hex
      */
     public static byte[] getBytes(byte b)
     {
-        return new byte[]{HEXES[getHighNibble(b)], HEXES[getLowNibble(b)]};
+        return new byte[]{HEX_BYTES[getHighNibble(b)], HEX_BYTES[getLowNibble(b)]};
     }
     
     /**
@@ -77,13 +76,57 @@ public final class Hex
         byte[] asciiBytes = new byte[bytes.length*2];
         for(int i=0; i< bytes.length; i++)
         {
-            asciiBytes[i*2] = HEXES[getHighNibble(bytes[i])];
-            asciiBytes[i*2+1] = HEXES[getLowNibble(bytes[i])];
+            asciiBytes[i*2] = HEX_BYTES[getHighNibble(bytes[i])];
+            asciiBytes[i*2+1] = HEX_BYTES[getLowNibble(bytes[i])];
         }
         return asciiBytes;
     }
 
     /** 
+     * Returns the characters corresponding to the ASCII hex encoding of the given short.
+     */
+    public static char[] getChars(short num)
+    {
+        char[] hex = new char[4];
+        hex[0] = HEX_CHARS[(num >> 12) & 0x0F];
+        hex[1] = HEX_CHARS[(num >> 8) & 0x0F];
+        hex[2] = HEX_CHARS[(num >> 4) & 0x0F];
+        hex[3] = HEX_CHARS[num & 0x0F];
+        return hex;
+    }
+
+    /**
+     * Takes the characters in the given string, convert it to bytes in UTF16-BE format
+     * and build a char array that corresponds to the ASCII hex encoding of the resulting
+     * bytes.
+     *
+     * Example:
+     * <pre>
+     *   getCharsUTF16BE("ab") == new char[]{'0','0','6','1','0','0','6','2'}
+     * </pre>
+     *
+     * @param text The string to convert
+     * @return The string converted to hex
+     */
+    public static char[] getCharsUTF16BE(String text)
+    {
+        // Note that the internal representation of string in Java is already UTF-16. Therefore
+        // we do not need to use an encoder to convert the string to its byte representation.
+        char[] hex = new char[text.length()*4];
+
+        for (int stringIdx = 0, charIdx = 0; stringIdx < text.length(); stringIdx++)
+        {
+            char c = text.charAt(stringIdx);
+            hex[charIdx++] = HEX_CHARS[(c >> 12) & 0x0F];
+            hex[charIdx++] = HEX_CHARS[(c >> 8) & 0x0F];
+            hex[charIdx++] = HEX_CHARS[(c >> 4) & 0x0F];
+            hex[charIdx++] = HEX_CHARS[c & 0x0F];
+        }
+
+        return hex;
+    }
+
+    /**
      * Writes the given byte as hex value to the given output stream.
      * @param b the byte to be written
      * @param output the output stream to be written to
@@ -91,8 +134,8 @@ public final class Hex
      */
     public static void writeHexByte(byte b, OutputStream output) throws IOException
     {
-        output.write(HEXES[getHighNibble(b)]);
-        output.write(HEXES[getLowNibble(b)]);
+        output.write(HEX_BYTES[getHighNibble(b)]);
+        output.write(HEX_BYTES[getLowNibble(b)]);
     }
 
     /** 

Added: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestHexUtil.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestHexUtil.java?rev=1752335&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestHexUtil.java (added)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestHexUtil.java Tue Jul 12 19:22:34 2016
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2016 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.util;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ *
+ * @author Michael Doswald
+ */
+public class TestHexUtil extends TestCase
+{
+    
+    /**
+     * Test conversion from short to char[]
+     */
+    public void testGetCharsFromShortWithoutPassingInABuffer()
+    {
+        assertArrayEquals(new char[]{'0','0','0','0'}, Hex.getChars((short)0x0000));
+        assertArrayEquals(new char[]{'0','0','0','F'}, Hex.getChars((short)0x000F));
+        assertArrayEquals(new char[]{'A','B','C','D'}, Hex.getChars((short)0xABCD));
+        assertArrayEquals(new char[]{'B','A','B','E'}, Hex.getChars((short)0xCAFEBABE));
+    }
+
+    /**
+     * Check conversion from String to a char[] which contains the UTF16-BE encoded
+     * bytes of the string as hex digits
+     *
+     */
+    public void testGetCharsUTF16BE()
+    {
+        assertArrayEquals(new char[]{'0','0','6','1','0','0','6','2'}, Hex.getCharsUTF16BE("ab"));
+        assertArrayEquals(new char[]{'5','E','2','E','5','2','A','9'}, Hex.getCharsUTF16BE("帮助"));
+    }
+
+    private void assertArrayEquals(char[] expected, char[] actual)
+    {
+        assertEquals("Length of char array not equal", expected.length, actual.length);
+        for (int idx = 0; idx < expected.length; idx++)
+        {
+            if (expected[idx] != actual[idx])
+            {
+                fail(String.format("Character at index %d not equal. Expected '%c' but got '%c'", 
+                        idx, expected[idx], actual[idx]));
+            }
+        }
+    }
+
+    /**
+     * Set the tests in the suite for this test class.
+     *
+     * @return the Suite.
+     */
+    public static Test suite()
+    {
+        return new TestSuite(TestHexUtil.class);
+    }
+
+    /**
+     * Command line execution.
+     *
+     * @param args Command line arguments.
+     */
+    public static void main(String[] args)
+    {
+        String[] arg =
+        {
+            TestHexUtil.class.getName()
+        };
+        junit.textui.TestRunner.main(arg);
+    }
+}

Propchange: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestHexUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native