You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2019/05/19 16:46:17 UTC

svn commit: r1859512 - in /pdfbox/branches/issue45: ./ fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java fontbox/src/test/resources/cmap/CMapTest

Author: lehmi
Date: Sun May 19 16:46:17 2019
New Revision: 1859512

URL: http://svn.apache.org/viewvc?rev=1859512&view=rev
Log:
PDFBOX-4550: refactored parsing of bfranges, eliminated buggy compare method

Modified:
    pdfbox/branches/issue45/   (props changed)
    pdfbox/branches/issue45/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
    pdfbox/branches/issue45/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
    pdfbox/branches/issue45/fontbox/src/test/resources/cmap/CMapTest

Propchange: pdfbox/branches/issue45/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun May 19 16:46:17 2019
@@ -1,3 +1,3 @@
 /pdfbox/branches/no-awt:1618517-1621410
 /pdfbox/no-awt:1618514-1618516
-/pdfbox/trunk:1736223,1736227,1736615,1737043,1737130,1737599-1737600,1738755,1740160,1742437,1742442,1743248,1745595,1745606,1745772,1745774,1745776,1745779,1746032,1746151,1749162,1749165,1749432,1766088,1766213,1767585,1768061,1770985,1770988,1772528,1778172,1782679,1786586,1786603,1787546,1789414,1790745,1794073,1794090,1794620,1794753,1794859,1794891,1800566,1812426,1814226,1826836,1859501
+/pdfbox/trunk:1736223,1736227,1736615,1737043,1737130,1737599-1737600,1738755,1740160,1742437,1742442,1743248,1745595,1745606,1745772,1745774,1745776,1745779,1746032,1746151,1749162,1749165,1749432,1766088,1766213,1767585,1768061,1770985,1770988,1772528,1778172,1782679,1786586,1786603,1787546,1789414,1790745,1794073,1794090,1794620,1794753,1794859,1794891,1800566,1812426,1814226,1826836,1859501,1859510

Modified: pdfbox/branches/issue45/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/issue45/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java?rev=1859512&r1=1859511&r2=1859512&view=diff
==============================================================================
--- pdfbox/branches/issue45/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java (original)
+++ pdfbox/branches/issue45/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java Sun May 19 16:46:17 2019
@@ -362,69 +362,58 @@ public class CMapParser
             }
             byte[] startCode = (byte[]) nextToken;
             byte[] endCode = (byte[]) parseNextToken(cmapStream);
-            if (!checkBoundaries(startCode, endCode))
+            int start = CMap.toInt(startCode, startCode.length);
+            int end = CMap.toInt(endCode, endCode.length);
+            // end has to be bigger than start or equal
+            // the range can not represent more that 255 values
+            if (end < start || (end - start) > 255)
             {
                 // PDFBOX-4550: likely corrupt stream
                 break;
             }
             nextToken = parseNextToken(cmapStream);
-            List<byte[]> array = null;
-            byte[] tokenBytes;
             if (nextToken instanceof List<?>)
             {
-                array = (List<byte[]>) nextToken;
-                if (array.isEmpty())
+                List<byte[]> array = (List<byte[]>) nextToken;
+                // ignore empty and malformed arrays
+                if (!array.isEmpty() && array.size() >= end - start)
                 {
-                    continue;
+                    addMappingFrombfrange(result, startCode, array);
                 }
-                tokenBytes = array.get(0);
             }
-            else
-            {
-                tokenBytes = (byte[]) nextToken;
-            }
-            if (tokenBytes == null || tokenBytes.length == 0)
+            // PDFBOX-3807: ignore null
+            else if (nextToken instanceof byte[])
             {
+                byte[] tokenBytes = (byte[]) nextToken;
                 // PDFBOX-3450: ignore <>
-                // PDFBOX-3807: ignore null
-                continue;
-            }
-            boolean done = false;
-
-            int arrayIndex = 0;
-            while (!done)
-            {
-                if (compare(startCode, endCode) >= 0)
-                {
-                    done = true;
-                }
-                String value = createStringFromBytes(tokenBytes);
-                result.addCharMapping(startCode, value);
-                increment(startCode);
-
-                if (array == null)
+                if (tokenBytes.length > 0)
                 {
-                    increment(tokenBytes);
-                }
-                else
-                {
-                    arrayIndex++;
-                    if (arrayIndex < array.size())
-                    {
-                        tokenBytes = array.get(arrayIndex);
-                    }
+                    addMappingFrombfrange(result, startCode, end - start + 1, tokenBytes);
                 }
             }
         }
     }
 
-    private boolean checkBoundaries(byte[] startCode, byte[] endCode)
+    private void addMappingFrombfrange(CMap cmap, byte[] startCode, List<byte[]> tokenBytesList)
     {
-        int start = CMap.toInt(startCode, startCode.length);
-        int end = CMap.toInt(endCode, endCode.length);
-        // end has to be bigger than start or equal
-        // the range can not represent more that 255 values
-        return end >= start && (end - start) < 256;
+        for (byte[] tokenBytes : tokenBytesList)
+        {
+            String value = createStringFromBytes(tokenBytes);
+            cmap.addCharMapping(startCode, value);
+            increment(startCode);
+        }
+    }
+
+    private void addMappingFrombfrange(CMap cmap, byte[] startCode, int values,
+            byte[] tokenBytes)
+    {
+        for (int i = 0; i < values; i++)
+        {
+            String value = createStringFromBytes(tokenBytes);
+            cmap.addCharMapping(startCode, value);
+            increment(startCode);
+            increment(tokenBytes);
+        }
     }
 
     /**
@@ -728,27 +717,6 @@ public class CMapParser
         return new String(bytes, bytes.length == 1 ? Charsets.ISO_8859_1 : Charsets.UTF_16BE);
     }
 
-    private int compare(byte[] first, byte[] second)
-    {
-        for (int i = 0; i < first.length; i++)
-        {
-            if (first[i] == second[i])
-            {
-                continue;
-            }
-
-            if ((first[i] & 0xFF) < (second[i] & 0xFF))
-            {
-                return -1;
-            }
-            else
-            {
-                return 1;
-            }
-        }
-        return 0;
-    }
-
     /**
      * Internal class.
      */

Modified: pdfbox/branches/issue45/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/issue45/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java?rev=1859512&r1=1859511&r2=1859512&view=diff
==============================================================================
--- pdfbox/branches/issue45/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java (original)
+++ pdfbox/branches/issue45/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java Sun May 19 16:46:17 2019
@@ -45,19 +45,31 @@ public class TestCMapParser extends Test
 
         // char mappings
         byte[] bytes1 = {0, 1};
-        assertEquals("bytes 00 01 from bfrange <0001> <0009> <0041>", "A", cMap.toUnicode(toInt(bytes1,bytes1.length)));
+        assertEquals("bytes 00 01 from bfrange <0001> <0005> <0041>", "A",
+                cMap.toUnicode(toInt(bytes1, bytes1.length)));
 
         byte[] bytes2 = {1, 00};
         String str2 = "0";
-        assertEquals("bytes 01 00 from bfrange <0100> <0109> <0030>", str2, cMap.toUnicode(toInt(bytes2,bytes2.length)));
+        assertEquals("bytes 01 00 from bfrange <0100> <0109> <0030>", str2,
+                cMap.toUnicode(toInt(bytes2, bytes2.length)));
 
-        byte[] bytes3 = {0, 10};
-        String str3 = "*";
-        assertEquals("bytes 00 0A from bfchar <000A> <002A>", str3, cMap.toUnicode(toInt(bytes3,bytes3.length)));
-
-        byte[] bytes4 = {1, 10};
-        String str4 = "+";
-        assertEquals("bytes 01 0A from bfchar <010A> <002B>", str4, cMap.toUnicode(toInt(bytes4,bytes4.length)));
+        byte[] bytes3 = { 1, 32 };
+        assertEquals("bytes 01 00 from bfrange <0100> <0109> <0030>", "P",
+                cMap.toUnicode(toInt(bytes3, bytes3.length)));
+
+        byte[] bytes4 = { 1, 33 };
+        assertEquals("bytes 01 00 from bfrange <0100> <0109> <0030>", "R",
+                cMap.toUnicode(toInt(bytes4, bytes4.length)));
+
+        byte[] bytes5 = { 0, 10 };
+        String str5 = "*";
+        assertEquals("bytes 00 0A from bfchar <000A> <002A>", str5,
+                cMap.toUnicode(toInt(bytes5, bytes5.length)));
+
+        byte[] bytes6 = { 1, 10 };
+        String str6 = "+";
+        assertEquals("bytes 01 0A from bfchar <010A> <002B>", str6,
+                cMap.toUnicode(toInt(bytes6, bytes6.length)));
 
         // CID mappings
         int cid1 = 65;

Modified: pdfbox/branches/issue45/fontbox/src/test/resources/cmap/CMapTest
URL: http://svn.apache.org/viewvc/pdfbox/branches/issue45/fontbox/src/test/resources/cmap/CMapTest?rev=1859512&r1=1859511&r2=1859512&view=diff
==============================================================================
--- pdfbox/branches/issue45/fontbox/src/test/resources/cmap/CMapTest (original)
+++ pdfbox/branches/issue45/fontbox/src/test/resources/cmap/CMapTest Sun May 19 16:46:17 2019
@@ -26,9 +26,10 @@ endcodespacerange
 <010A> <002B>
 endbfchar
 
-2 beginbfrange
-<0001> <0009> <0041>
+3 beginbfrange
+<0001> <0005> <0041>
 <0100> <0109> <0030>
+<0120> <0122> [<0050> <0052> <0054>]
 endbfrange
 
 3 begincidchar