You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2016/07/18 20:06:18 UTC

svn commit: r1753313 - in /pdfbox/branches/2.0/pdfbox/src: main/java/org/apache/pdfbox/pdmodel/PDPageContentStream.java main/java/org/apache/pdfbox/util/NumberFormatUtil.java test/java/org/apache/pdfbox/util/TestNumberFormatUtil.java

Author: tilman
Date: Mon Jul 18 20:06:18 2016
New Revision: 1753313

URL: http://svn.apache.org/viewvc?rev=1753313&view=rev
Log:
PDFBOX-3421: Optimize float to string conversion, as done by Michael Doswald

Added:
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/util/NumberFormatUtil.java   (with props)
    pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/util/TestNumberFormatUtil.java   (with props)
Modified:
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageContentStream.java

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageContentStream.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageContentStream.java?rev=1753313&r1=1753312&r2=1753313&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageContentStream.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDPageContentStream.java Mon Jul 18 20:06:18 2016
@@ -53,6 +53,7 @@ import org.apache.pdfbox.pdmodel.graphic
 import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream;
 import org.apache.pdfbox.util.Charsets;
 import org.apache.pdfbox.util.Matrix;
+import org.apache.pdfbox.util.NumberFormatUtil;
 
 /**
  * Provides the ability to write to a page content stream.
@@ -105,6 +106,7 @@ public final class PDPageContentStream i
 
     // number format
     private final NumberFormat formatDecimal = NumberFormat.getNumberInstance(Locale.US);
+    private final byte[] formatBuffer = new byte[32];
 
     /**
      * Create a new PDPage content stream.
@@ -266,7 +268,7 @@ public final class PDPageContentStream i
         }
 
         // configure NumberFormat
-        formatDecimal.setMaximumFractionDigits(10);
+        formatDecimal.setMaximumFractionDigits(5);
         formatDecimal.setGroupingUsed(false);
     }
 
@@ -2217,9 +2219,19 @@ public final class PDPageContentStream i
     /**
      * Writes a real number to the content stream.
      */
-    private void writeOperand(float real) throws IOException
+    protected void writeOperand(float real) throws IOException
     {
-        write(formatDecimal.format(real));
+        int byteCount = NumberFormatUtil.formatFloatFast(real, formatDecimal.getMaximumFractionDigits(), formatBuffer);
+
+        if (byteCount == -1)
+        {
+            //Fast formatting failed
+            write(formatDecimal.format(real));
+        }
+        else
+        {
+            output.write(formatBuffer, 0, byteCount);
+        }
         output.write(' ');
     }
 

Added: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/util/NumberFormatUtil.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/util/NumberFormatUtil.java?rev=1753313&view=auto
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/util/NumberFormatUtil.java (added)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/util/NumberFormatUtil.java Mon Jul 18 20:06:18 2016
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2016 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.util;
+
+/**
+ * This class contains methods to format numbers.
+ *
+ * @author Michael Doswald
+ */
+public class NumberFormatUtil
+{
+
+    /**
+     * Maximum number of fraction digits supported by the format methods
+     */
+    private static final int MAX_FRACTION_DIGITS = 5;
+
+    /**
+     * Contains the power of ten values for fast lookup in the format methods
+     */
+    private static final long[] POWER_OF_TENS;
+    private static final int[] POWER_OF_TENS_INT;
+
+    static
+    {
+        POWER_OF_TENS = new long[19];
+        POWER_OF_TENS[0] = 1;
+
+        for (int exp = 1; exp < POWER_OF_TENS.length; exp++)
+        {
+            POWER_OF_TENS[exp] = POWER_OF_TENS[exp - 1] * 10;
+        }
+
+        POWER_OF_TENS_INT = new int[10];
+        POWER_OF_TENS_INT[0] = 1;
+
+        for (int exp = 1; exp < POWER_OF_TENS_INT.length; exp++)
+        {
+            POWER_OF_TENS_INT[exp] = POWER_OF_TENS_INT[exp - 1] * 10;
+        }
+    }
+
+    /**
+     * Fast variant to format a floating point value to a ASCII-string. The format will fail if the
+     * value is greater than {@link Long#MAX_VALUE}, smaller or equal to {@link Long#MIN_VALUE}, is
+     * {@link Float#NaN}, infinite or the number of requested fraction digits is greater than
+     * {@link #MAX_FRACTION_DIGITS}.
+     * 
+     * When the number contains more fractional digits than {@code maxFractionDigits} the value will
+     * be rounded. Rounding is done to the nearest possible value, with the tie breaking rule of 
+     * rounding away from zero.
+     * 
+     * @param value The float value to format
+     * @param maxFractionDigits The maximum number of fraction digits used
+     * @param asciiBuffer The output buffer to write the formatted value to
+     *
+     * @return The number of bytes used in the buffer or {@code -1} if formatting failed
+     */
+    public static int formatFloatFast(float value, int maxFractionDigits, byte[] asciiBuffer)
+    {
+        if (Float.isNaN(value) ||
+                Float.isInfinite(value) ||
+                value > Long.MAX_VALUE ||
+                value <= Long.MIN_VALUE ||
+                maxFractionDigits > MAX_FRACTION_DIGITS)
+        {
+            return -1;
+        }
+
+        int offset = 0;
+        long integerPart = (long) value;
+
+        //handle sign
+        if (value < 0)
+        {
+            asciiBuffer[offset++] = '-';
+            integerPart = -integerPart;
+        }
+        
+        //extract fraction part 
+        long fractionPart = (long) ((Math.abs((double)value) - integerPart) * POWER_OF_TENS[maxFractionDigits] + 0.5d);
+        
+        //Check for rounding to next integer
+        if (fractionPart >= POWER_OF_TENS[maxFractionDigits]) {
+            integerPart++;
+            fractionPart -= POWER_OF_TENS[maxFractionDigits];
+        }
+
+        //format integer part
+        offset = formatPositiveNumber(integerPart, getExponent(integerPart), false, asciiBuffer, offset);
+        
+        if (fractionPart > 0 && maxFractionDigits > 0)
+        {
+            asciiBuffer[offset++] = '.';
+            offset = formatPositiveNumber(fractionPart, maxFractionDigits - 1, true, asciiBuffer, offset);
+        }
+
+        return offset;
+    }
+
+    /**
+     * Formats a positive integer number starting with the digit at {@code 10^exp}.
+     *
+     * @param number The number to format
+     * @param exp The start digit
+     * @param omitTrailingZeros Whether the formatting should stop if only trailing zeros are left.
+     * This is needed e.g. when formatting fractions of a number.
+     * @param asciiBuffer The buffer to write the ASCII digits to
+     * @param startOffset The start offset into the buffer to start writing
+     *
+     * @return The offset into the buffer which contains the first byte that was not filled by the
+     * method
+     */
+    private static int formatPositiveNumber(long number, int exp, boolean omitTrailingZeros, byte[] asciiBuffer, int startOffset)
+    {
+        int offset = startOffset;
+        long remaining = number;
+
+        while (remaining > Integer.MAX_VALUE && (!omitTrailingZeros || remaining > 0))
+        {
+            long digit = remaining / POWER_OF_TENS[exp];
+            remaining -= (digit * POWER_OF_TENS[exp]);
+
+            asciiBuffer[offset++] = (byte) ('0' + digit);
+            exp--;
+        }
+
+        //If the remaining fits into an integer, use int arithmetic as it is faster
+        int remainingInt = (int) remaining;
+        while (exp >= 0 && (!omitTrailingZeros || remainingInt > 0))
+        {
+            int digit = remainingInt / POWER_OF_TENS_INT[exp];
+            remainingInt -= (digit * POWER_OF_TENS_INT[exp]);
+
+            asciiBuffer[offset++] = (byte) ('0' + digit);
+            exp--;
+        }
+
+        return offset;
+    }
+
+    /**
+     * Returns the highest exponent of 10 where {@code 10^exp < number} for numbers > 0
+     */
+    private static int getExponent(long number)
+    {
+        for (int exp = 0; exp < (POWER_OF_TENS.length - 1); exp++)
+        {
+            if (number < POWER_OF_TENS[exp + 1])
+            {
+                return exp;
+            }
+        }
+
+        return POWER_OF_TENS.length - 1;
+    }
+
+}

Propchange: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/util/NumberFormatUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/util/TestNumberFormatUtil.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/util/TestNumberFormatUtil.java?rev=1753313&view=auto
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/util/TestNumberFormatUtil.java (added)
+++ pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/util/TestNumberFormatUtil.java Mon Jul 18 20:06:18 2016
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2016 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.util;
+
+import java.math.BigDecimal;
+import java.math.MathContext;
+import java.math.RoundingMode;
+import java.util.Arrays;
+import java.util.regex.Pattern;
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ *
+ * @author Michael Doswald
+ */
+public class TestNumberFormatUtil extends TestCase
+{
+
+    private final byte[] buffer = new byte[64];
+
+    public void testFormatOfIntegerValues()
+    {
+        assertEquals(2, NumberFormatUtil.formatFloatFast(51, 5, buffer));
+        assertArrayEquals(new byte[]{'5', '1'}, Arrays.copyOfRange(buffer, 0, 2));
+
+        assertEquals(3, NumberFormatUtil.formatFloatFast(-51, 5, buffer));
+        assertArrayEquals(new byte[]{'-', '5', '1'}, Arrays.copyOfRange(buffer, 0, 3));
+
+        assertEquals(1, NumberFormatUtil.formatFloatFast(0, 5, buffer));
+        assertArrayEquals(new byte[]{'0'}, Arrays.copyOfRange(buffer, 0, 1));
+
+        assertEquals(19, NumberFormatUtil.formatFloatFast(Long.MAX_VALUE, 5, buffer));
+        assertArrayEquals(new byte[]{'9', '2', '2', '3', '3', '7', '2', '0', '3', '6', '8', '5', 
+                                     '4', '7', '7', '5', '8', '0', '7'}, 
+                          Arrays.copyOfRange(buffer, 0, 19));
+
+        //Note: Integer.MAX_VALUE would be 2147483647, but when converting to float, we have 
+        //      precision errors. NumberFormat.getIntegerInstance() does also print 2147483648 for 
+        //      such a float
+        assertEquals(10, NumberFormatUtil.formatFloatFast(Integer.MAX_VALUE, 5, buffer));
+        assertArrayEquals(new byte[]{'2', '1', '4', '7', '4', '8', '3', '6', '4', '8'}, 
+                          Arrays.copyOfRange(buffer, 0, 10));
+
+        assertEquals(11, NumberFormatUtil.formatFloatFast(Integer.MIN_VALUE, 5, buffer));
+        assertArrayEquals(new byte[]{'-', '2', '1', '4', '7', '4', '8', '3', '6', '4', '8'}, 
+                          Arrays.copyOfRange(buffer, 0, 11));
+    }
+
+    public void testFormatOfRealValues()
+    {
+        assertEquals(3, NumberFormatUtil.formatFloatFast(0.7f, 5, buffer));
+        assertArrayEquals(new byte[]{'0', '.', '7'}, Arrays.copyOfRange(buffer, 0, 3));
+
+        assertEquals(4, NumberFormatUtil.formatFloatFast(-0.7f, 5, buffer));
+        assertArrayEquals(new byte[]{'-', '0', '.', '7'}, Arrays.copyOfRange(buffer, 0, 4));
+
+        assertEquals(5, NumberFormatUtil.formatFloatFast(0.003f, 5, buffer));
+        assertArrayEquals(new byte[]{'0', '.', '0', '0', '3'}, Arrays.copyOfRange(buffer, 0, 5));
+
+        assertEquals(6, NumberFormatUtil.formatFloatFast(-0.003f, 5, buffer));
+        assertArrayEquals(new byte[]{'-', '0', '.', '0', '0', '3'}, 
+                          Arrays.copyOfRange(buffer, 0, 6));
+    }
+
+    public void testFormatOfRealValuesReturnsMinusOneIfItCannotBeFormatted()
+    {
+        assertEquals("NaN should not be formattable", -1, 
+                     NumberFormatUtil.formatFloatFast(Float.NaN, 5, buffer));
+        assertEquals("+Infinity should not be formattable", -1, 
+                     NumberFormatUtil.formatFloatFast(Float.POSITIVE_INFINITY, 5, buffer));
+        assertEquals("-Infinity should not be formattable", -1, 
+                     NumberFormatUtil.formatFloatFast(Float.NEGATIVE_INFINITY, 5, buffer));
+
+        assertEquals("Too big number should not be formattable", -1, 
+                     NumberFormatUtil.formatFloatFast(((float) Long.MAX_VALUE) + 1000000000000f, 5, buffer));
+        assertEquals("Too big negative number should not be formattable", -1, 
+                     NumberFormatUtil.formatFloatFast(Long.MIN_VALUE, 5, buffer));
+    }
+
+    public void testRoundingUp()
+    {
+        assertEquals(1, NumberFormatUtil.formatFloatFast(0.999999f, 5, buffer));
+        assertArrayEquals(new byte[]{'1'}, Arrays.copyOfRange(buffer, 0, 1));
+        
+        assertEquals(4, NumberFormatUtil.formatFloatFast(0.125f, 2, buffer));
+        assertArrayEquals(new byte[]{'0','.','1','3'}, Arrays.copyOfRange(buffer, 0, 4));
+        
+        assertEquals(2, NumberFormatUtil.formatFloatFast(-0.999999f, 5, buffer));
+        assertArrayEquals(new byte[]{'-','1'}, Arrays.copyOfRange(buffer, 0, 2));
+    }
+    
+    public void testRoundingDown()
+    {
+        assertEquals(4, NumberFormatUtil.formatFloatFast(0.994f, 2, buffer));
+        assertArrayEquals(new byte[]{'0','.','9','9'}, Arrays.copyOfRange(buffer, 0, 4));
+    }
+
+    /**
+     * Formats all floats in a defined range, parses them back with the BigDecimal constructor and
+     * compares them to the expected result. The test only tests a small range for performance 
+     * reasons. It works for ranges up to at least A0 size:
+     * 
+     * <ul>
+     *   <li>PDF uses 72 dpi resolution</li>
+     *   <li>A0 size is 841mm x 1189mm, this equals to about 2472 x 3495 in dot resolution</li>
+     * </ul>
+     */
+    public void testFormattingInRange()
+    {
+        //Define a range to test
+        BigDecimal minVal = new BigDecimal("-10");
+        BigDecimal maxVal = new BigDecimal("10");
+        BigDecimal maxDelta = BigDecimal.ZERO;
+        
+        Pattern pattern = Pattern.compile("^\\-?\\d+(\\.\\d+)?$");
+        
+        byte[] formatBuffer = new byte[32];
+        
+        for (int maxFractionDigits = 0; maxFractionDigits <= 5; maxFractionDigits++)
+        {
+            BigDecimal increment =  new BigDecimal(10).pow(-maxFractionDigits, MathContext.DECIMAL128);
+            
+            for (BigDecimal value = minVal; value.compareTo(maxVal) < 0; value = value.add(increment))
+            {
+                //format with the formatFloatFast method and parse back
+                int byteCount = NumberFormatUtil.formatFloatFast(value.floatValue(), maxFractionDigits, formatBuffer);
+                assertFalse(byteCount == -1);
+                String newStringResult = new String(formatBuffer, 0, byteCount, Charsets.US_ASCII);
+                BigDecimal formattedDecimal = new BigDecimal(newStringResult);
+                
+                //create new BigDecimal with float representation. This is needed because the float
+                //may not represent the 'value' BigDecimal precisely, in which case the formatFloatFast
+                //would get a different result.
+                BigDecimal expectedDecimal = new BigDecimal(value.floatValue());
+                expectedDecimal = expectedDecimal.setScale(maxFractionDigits, RoundingMode.HALF_UP);
+                
+                BigDecimal diff = formattedDecimal.subtract(expectedDecimal).abs();
+                
+                assertTrue(pattern.matcher(newStringResult).matches());
+                
+                //Fail if diff is greater than maxDelta.
+                if (diff.compareTo(maxDelta) > 0)
+                {
+                    fail("Expected: " + expectedDecimal + ", actual: " + newStringResult + ", diff: " + diff);
+                }
+            }
+        }
+    }
+    
+    private void assertArrayEquals(byte[] expected, byte[] actual)
+    {
+        assertEquals("Length of byte array not equal", expected.length, actual.length);
+        for (int idx = 0; idx < expected.length; idx++)
+        {
+            if (expected[idx] != actual[idx])
+            {
+                fail(String.format("Byte at index %d not equal. Expected '%02X' but got '%02X'",
+                        idx, expected[idx], actual[idx]));
+            }
+        }
+    }
+
+    /**
+     * Set the tests in the suite for this test class.
+     *
+     * @return the Suite.
+     */
+    public static Test suite()
+    {
+        return new TestSuite(TestNumberFormatUtil.class);
+    }
+
+    /**
+     * Command line execution.
+     *
+     * @param args Command line arguments.
+     */
+    public static void main(String[] args)
+    {
+        String[] arg =
+        {
+            TestNumberFormatUtil.class.getName()
+        };
+        junit.textui.TestRunner.main(arg);
+    }
+}

Propchange: pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/util/TestNumberFormatUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native