You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2016/07/12 19:22:34 UTC
svn commit: r1752335 - in /pdfbox/trunk/pdfbox/src:
main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java
main/java/org/apache/pdfbox/util/Hex.java
test/java/org/apache/pdfbox/util/TestHexUtil.java
Author: tilman
Date: Tue Jul 12 19:22:34 2016
New Revision: 1752335
URL: http://svn.apache.org/viewvc?rev=1752335&view=rev
Log:
PDFBOX-3418: optimize string to hex conversion, as suggested by Michael Doswald
Added:
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestHexUtil.java (with props)
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java?rev=1752335&r1=1752334&r2=1752335&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java Tue Jul 12 19:22:34 2016
@@ -26,6 +26,7 @@ import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.pdfbox.util.Charsets;
+import org.apache.pdfbox.util.Hex;
/**
* Writes ToUnicode Mapping Files.
@@ -154,15 +155,15 @@ final class ToUnicodeWriter
{
int index = batch * 100 + j;
writer.write('<');
- writer.write(toHex(srcFrom.get(index)));
+ writer.write(Hex.getChars(srcFrom.get(index).shortValue()));
writer.write("> ");
writer.write('<');
- writer.write(toHex(srcTo.get(index)));
+ writer.write(Hex.getChars(srcTo.get(index).shortValue()));
writer.write("> ");
- writer.write("<");
- writer.write(stringToHex(dstString.get(index)));
+ writer.write('<');
+ writer.write(Hex.getCharsUTF16BE(dstString.get(index)));
writer.write(">\n");
}
writeLine(writer, "endbfrange\n");
@@ -182,20 +183,4 @@ final class ToUnicodeWriter
writer.write(text);
writer.write('\n');
}
-
- private String toHex(int num)
- {
- return String.format("%04X", num);
- }
-
- private String stringToHex(String text)
- {
- // use of non-BMP code points requires PDF 1.5 or later, otherwise we're limited to UCS-2
- StringBuilder sb = new StringBuilder();
- for (byte b : text.getBytes(Charsets.UTF_16BE))
- {
- sb.append(String.format("%02X", b));
- }
- return sb.toString();
- }
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java?rev=1752335&r1=1752334&r2=1752335&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java Tue Jul 12 19:22:34 2016
@@ -33,9 +33,8 @@ public final class Hex
* https://stackoverflow.com/questions/2817752/java-code-to-convert-byte-to-hexadecimal
*
*/
- private static final String HEXES_STRING = "0123456789ABCDEF";
-
- private static final byte[] HEXES = HEXES_STRING.getBytes(Charsets.US_ASCII);
+ private static final byte[] HEX_BYTES = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
+ private static final char[] HEX_CHARS = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
private Hex() {}
@@ -44,7 +43,7 @@ public final class Hex
*/
public static String getString(byte b)
{
- char[] chars = new char[]{HEXES_STRING.charAt(getHighNibble(b)), HEXES_STRING.charAt(getLowNibble(b))};
+ char[] chars = new char[]{HEX_CHARS[getHighNibble(b)], HEX_CHARS[getLowNibble(b)]};
return new String(chars);
}
@@ -56,7 +55,7 @@ public final class Hex
StringBuilder string = new StringBuilder(bytes.length * 2);
for (byte b : bytes)
{
- string.append(HEXES_STRING.charAt(getHighNibble(b))).append(HEXES_STRING.charAt(getLowNibble(b)));
+ string.append(HEX_CHARS[getHighNibble(b)]).append(HEX_CHARS[getLowNibble(b)]);
}
return string.toString();
}
@@ -66,7 +65,7 @@ public final class Hex
*/
public static byte[] getBytes(byte b)
{
- return new byte[]{HEXES[getHighNibble(b)], HEXES[getLowNibble(b)]};
+ return new byte[]{HEX_BYTES[getHighNibble(b)], HEX_BYTES[getLowNibble(b)]};
}
/**
@@ -77,13 +76,57 @@ public final class Hex
byte[] asciiBytes = new byte[bytes.length*2];
for(int i=0; i< bytes.length; i++)
{
- asciiBytes[i*2] = HEXES[getHighNibble(bytes[i])];
- asciiBytes[i*2+1] = HEXES[getLowNibble(bytes[i])];
+ asciiBytes[i*2] = HEX_BYTES[getHighNibble(bytes[i])];
+ asciiBytes[i*2+1] = HEX_BYTES[getLowNibble(bytes[i])];
}
return asciiBytes;
}
/**
+ * Returns the characters corresponding to the ASCII hex encoding of the given short.
+ */
+ public static char[] getChars(short num)
+ {
+ char[] hex = new char[4];
+ hex[0] = HEX_CHARS[(num >> 12) & 0x0F];
+ hex[1] = HEX_CHARS[(num >> 8) & 0x0F];
+ hex[2] = HEX_CHARS[(num >> 4) & 0x0F];
+ hex[3] = HEX_CHARS[num & 0x0F];
+ return hex;
+ }
+
+ /**
+ * Takes the characters in the given string, convert it to bytes in UTF16-BE format
+ * and build a char array that corresponds to the ASCII hex encoding of the resulting
+ * bytes.
+ *
+ * Example:
+ * <pre>
+ * getCharsUTF16BE("ab") == new char[]{'0','0','6','1','0','0','6','2'}
+ * </pre>
+ *
+ * @param text The string to convert
+ * @return The string converted to hex
+ */
+ public static char[] getCharsUTF16BE(String text)
+ {
+ // Note that the internal representation of string in Java is already UTF-16. Therefore
+ // we do not need to use an encoder to convert the string to its byte representation.
+ char[] hex = new char[text.length()*4];
+
+ for (int stringIdx = 0, charIdx = 0; stringIdx < text.length(); stringIdx++)
+ {
+ char c = text.charAt(stringIdx);
+ hex[charIdx++] = HEX_CHARS[(c >> 12) & 0x0F];
+ hex[charIdx++] = HEX_CHARS[(c >> 8) & 0x0F];
+ hex[charIdx++] = HEX_CHARS[(c >> 4) & 0x0F];
+ hex[charIdx++] = HEX_CHARS[c & 0x0F];
+ }
+
+ return hex;
+ }
+
+ /**
* Writes the given byte as hex value to the given output stream.
* @param b the byte to be written
* @param output the output stream to be written to
@@ -91,8 +134,8 @@ public final class Hex
*/
public static void writeHexByte(byte b, OutputStream output) throws IOException
{
- output.write(HEXES[getHighNibble(b)]);
- output.write(HEXES[getLowNibble(b)]);
+ output.write(HEX_BYTES[getHighNibble(b)]);
+ output.write(HEX_BYTES[getLowNibble(b)]);
}
/**
Added: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestHexUtil.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestHexUtil.java?rev=1752335&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestHexUtil.java (added)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestHexUtil.java Tue Jul 12 19:22:34 2016
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2016 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.util;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ *
+ * @author Michael Doswald
+ */
+public class TestHexUtil extends TestCase
+{
+
+ /**
+ * Test conversion from short to char[]
+ */
+ public void testGetCharsFromShortWithoutPassingInABuffer()
+ {
+ assertArrayEquals(new char[]{'0','0','0','0'}, Hex.getChars((short)0x0000));
+ assertArrayEquals(new char[]{'0','0','0','F'}, Hex.getChars((short)0x000F));
+ assertArrayEquals(new char[]{'A','B','C','D'}, Hex.getChars((short)0xABCD));
+ assertArrayEquals(new char[]{'B','A','B','E'}, Hex.getChars((short)0xCAFEBABE));
+ }
+
+ /**
+ * Check conversion from String to a char[] which contains the UTF16-BE encoded
+ * bytes of the string as hex digits
+ *
+ */
+ public void testGetCharsUTF16BE()
+ {
+ assertArrayEquals(new char[]{'0','0','6','1','0','0','6','2'}, Hex.getCharsUTF16BE("ab"));
+ assertArrayEquals(new char[]{'5','E','2','E','5','2','A','9'}, Hex.getCharsUTF16BE("帮助"));
+ }
+
+ private void assertArrayEquals(char[] expected, char[] actual)
+ {
+ assertEquals("Length of char array not equal", expected.length, actual.length);
+ for (int idx = 0; idx < expected.length; idx++)
+ {
+ if (expected[idx] != actual[idx])
+ {
+ fail(String.format("Character at index %d not equal. Expected '%c' but got '%c'",
+ idx, expected[idx], actual[idx]));
+ }
+ }
+ }
+
+ /**
+ * Set the tests in the suite for this test class.
+ *
+ * @return the Suite.
+ */
+ public static Test suite()
+ {
+ return new TestSuite(TestHexUtil.class);
+ }
+
+ /**
+ * Command line execution.
+ *
+ * @param args Command line arguments.
+ */
+ public static void main(String[] args)
+ {
+ String[] arg =
+ {
+ TestHexUtil.class.getName()
+ };
+ junit.textui.TestRunner.main(arg);
+ }
+}
Propchange: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestHexUtil.java
------------------------------------------------------------------------------
svn:eol-style = native