You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2019/05/19 16:46:17 UTC
svn commit: r1859512 - in /pdfbox/branches/issue45: ./
fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
fontbox/src/test/resources/cmap/CMapTest
Author: lehmi
Date: Sun May 19 16:46:17 2019
New Revision: 1859512
URL: http://svn.apache.org/viewvc?rev=1859512&view=rev
Log:
PDFBOX-4550: refactored parsing of bfranges, eliminated buggy compare method
Modified:
pdfbox/branches/issue45/ (props changed)
pdfbox/branches/issue45/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
pdfbox/branches/issue45/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
pdfbox/branches/issue45/fontbox/src/test/resources/cmap/CMapTest
Propchange: pdfbox/branches/issue45/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun May 19 16:46:17 2019
@@ -1,3 +1,3 @@
/pdfbox/branches/no-awt:1618517-1621410
/pdfbox/no-awt:1618514-1618516
-/pdfbox/trunk:1736223,1736227,1736615,1737043,1737130,1737599-1737600,1738755,1740160,1742437,1742442,1743248,1745595,1745606,1745772,1745774,1745776,1745779,1746032,1746151,1749162,1749165,1749432,1766088,1766213,1767585,1768061,1770985,1770988,1772528,1778172,1782679,1786586,1786603,1787546,1789414,1790745,1794073,1794090,1794620,1794753,1794859,1794891,1800566,1812426,1814226,1826836,1859501
+/pdfbox/trunk:1736223,1736227,1736615,1737043,1737130,1737599-1737600,1738755,1740160,1742437,1742442,1743248,1745595,1745606,1745772,1745774,1745776,1745779,1746032,1746151,1749162,1749165,1749432,1766088,1766213,1767585,1768061,1770985,1770988,1772528,1778172,1782679,1786586,1786603,1787546,1789414,1790745,1794073,1794090,1794620,1794753,1794859,1794891,1800566,1812426,1814226,1826836,1859501,1859510
Modified: pdfbox/branches/issue45/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/issue45/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java?rev=1859512&r1=1859511&r2=1859512&view=diff
==============================================================================
--- pdfbox/branches/issue45/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java (original)
+++ pdfbox/branches/issue45/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java Sun May 19 16:46:17 2019
@@ -362,69 +362,58 @@ public class CMapParser
}
byte[] startCode = (byte[]) nextToken;
byte[] endCode = (byte[]) parseNextToken(cmapStream);
- if (!checkBoundaries(startCode, endCode))
+ int start = CMap.toInt(startCode, startCode.length);
+ int end = CMap.toInt(endCode, endCode.length);
+ // end has to be bigger than start or equal
+ // the range can not represent more that 255 values
+ if (end < start || (end - start) > 255)
{
// PDFBOX-4550: likely corrupt stream
break;
}
nextToken = parseNextToken(cmapStream);
- List<byte[]> array = null;
- byte[] tokenBytes;
if (nextToken instanceof List<?>)
{
- array = (List<byte[]>) nextToken;
- if (array.isEmpty())
+ List<byte[]> array = (List<byte[]>) nextToken;
+ // ignore empty and malformed arrays
+ if (!array.isEmpty() && array.size() >= end - start)
{
- continue;
+ addMappingFrombfrange(result, startCode, array);
}
- tokenBytes = array.get(0);
}
- else
- {
- tokenBytes = (byte[]) nextToken;
- }
- if (tokenBytes == null || tokenBytes.length == 0)
+ // PDFBOX-3807: ignore null
+ else if (nextToken instanceof byte[])
{
+ byte[] tokenBytes = (byte[]) nextToken;
// PDFBOX-3450: ignore <>
- // PDFBOX-3807: ignore null
- continue;
- }
- boolean done = false;
-
- int arrayIndex = 0;
- while (!done)
- {
- if (compare(startCode, endCode) >= 0)
- {
- done = true;
- }
- String value = createStringFromBytes(tokenBytes);
- result.addCharMapping(startCode, value);
- increment(startCode);
-
- if (array == null)
+ if (tokenBytes.length > 0)
{
- increment(tokenBytes);
- }
- else
- {
- arrayIndex++;
- if (arrayIndex < array.size())
- {
- tokenBytes = array.get(arrayIndex);
- }
+ addMappingFrombfrange(result, startCode, end - start + 1, tokenBytes);
}
}
}
}
- private boolean checkBoundaries(byte[] startCode, byte[] endCode)
+ private void addMappingFrombfrange(CMap cmap, byte[] startCode, List<byte[]> tokenBytesList)
{
- int start = CMap.toInt(startCode, startCode.length);
- int end = CMap.toInt(endCode, endCode.length);
- // end has to be bigger than start or equal
- // the range can not represent more that 255 values
- return end >= start && (end - start) < 256;
+ for (byte[] tokenBytes : tokenBytesList)
+ {
+ String value = createStringFromBytes(tokenBytes);
+ cmap.addCharMapping(startCode, value);
+ increment(startCode);
+ }
+ }
+
+ private void addMappingFrombfrange(CMap cmap, byte[] startCode, int values,
+ byte[] tokenBytes)
+ {
+ for (int i = 0; i < values; i++)
+ {
+ String value = createStringFromBytes(tokenBytes);
+ cmap.addCharMapping(startCode, value);
+ increment(startCode);
+ increment(tokenBytes);
+ }
}
/**
@@ -728,27 +717,6 @@ public class CMapParser
return new String(bytes, bytes.length == 1 ? Charsets.ISO_8859_1 : Charsets.UTF_16BE);
}
- private int compare(byte[] first, byte[] second)
- {
- for (int i = 0; i < first.length; i++)
- {
- if (first[i] == second[i])
- {
- continue;
- }
-
- if ((first[i] & 0xFF) < (second[i] & 0xFF))
- {
- return -1;
- }
- else
- {
- return 1;
- }
- }
- return 0;
- }
-
/**
* Internal class.
*/
Modified: pdfbox/branches/issue45/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/issue45/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java?rev=1859512&r1=1859511&r2=1859512&view=diff
==============================================================================
--- pdfbox/branches/issue45/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java (original)
+++ pdfbox/branches/issue45/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java Sun May 19 16:46:17 2019
@@ -45,19 +45,31 @@ public class TestCMapParser extends Test
// char mappings
byte[] bytes1 = {0, 1};
- assertEquals("bytes 00 01 from bfrange <0001> <0009> <0041>", "A", cMap.toUnicode(toInt(bytes1,bytes1.length)));
+ assertEquals("bytes 00 01 from bfrange <0001> <0005> <0041>", "A",
+ cMap.toUnicode(toInt(bytes1, bytes1.length)));
byte[] bytes2 = {1, 00};
String str2 = "0";
- assertEquals("bytes 01 00 from bfrange <0100> <0109> <0030>", str2, cMap.toUnicode(toInt(bytes2,bytes2.length)));
+ assertEquals("bytes 01 00 from bfrange <0100> <0109> <0030>", str2,
+ cMap.toUnicode(toInt(bytes2, bytes2.length)));
- byte[] bytes3 = {0, 10};
- String str3 = "*";
- assertEquals("bytes 00 0A from bfchar <000A> <002A>", str3, cMap.toUnicode(toInt(bytes3,bytes3.length)));
-
- byte[] bytes4 = {1, 10};
- String str4 = "+";
- assertEquals("bytes 01 0A from bfchar <010A> <002B>", str4, cMap.toUnicode(toInt(bytes4,bytes4.length)));
+ byte[] bytes3 = { 1, 32 };
+ assertEquals("bytes 01 00 from bfrange <0100> <0109> <0030>", "P",
+ cMap.toUnicode(toInt(bytes3, bytes3.length)));
+
+ byte[] bytes4 = { 1, 33 };
+ assertEquals("bytes 01 00 from bfrange <0100> <0109> <0030>", "R",
+ cMap.toUnicode(toInt(bytes4, bytes4.length)));
+
+ byte[] bytes5 = { 0, 10 };
+ String str5 = "*";
+ assertEquals("bytes 00 0A from bfchar <000A> <002A>", str5,
+ cMap.toUnicode(toInt(bytes5, bytes5.length)));
+
+ byte[] bytes6 = { 1, 10 };
+ String str6 = "+";
+ assertEquals("bytes 01 0A from bfchar <010A> <002B>", str6,
+ cMap.toUnicode(toInt(bytes6, bytes6.length)));
// CID mappings
int cid1 = 65;
Modified: pdfbox/branches/issue45/fontbox/src/test/resources/cmap/CMapTest
URL: http://svn.apache.org/viewvc/pdfbox/branches/issue45/fontbox/src/test/resources/cmap/CMapTest?rev=1859512&r1=1859511&r2=1859512&view=diff
==============================================================================
--- pdfbox/branches/issue45/fontbox/src/test/resources/cmap/CMapTest (original)
+++ pdfbox/branches/issue45/fontbox/src/test/resources/cmap/CMapTest Sun May 19 16:46:17 2019
@@ -26,9 +26,10 @@ endcodespacerange
<010A> <002B>
endbfchar
-2 beginbfrange
-<0001> <0009> <0041>
+3 beginbfrange
+<0001> <0005> <0041>
<0100> <0109> <0030>
+<0120> <0122> [<0050> <0052> <0054>]
endbfrange
3 begincidchar