You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2014/11/23 17:52:38 UTC
svn commit: r1641221 - in /pdfbox/trunk/fontbox/src:
main/java/org/apache/fontbox/cmap/CMapParser.java
test/java/org/apache/fontbox/cmap/TestCMapParser.java
test/resources/cmap/CMapNoWhitespace
Author: lehmi
Date: Sun Nov 23 16:52:37 2014
New Revision: 1641221
URL: http://svn.apache.org/r1641221
Log:
PDFBOX-1648: improve the CMapParser to detect missing delimiters as proposed by May Gilead and Kevin Pearcey
Added:
pdfbox/trunk/fontbox/src/test/resources/cmap/CMapNoWhitespace
- copied unchanged from r1641219, pdfbox/branches/1.8/fontbox/src/test/resources/cmap/CMapNoWhitespace
Modified:
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java?rev=1641221&r1=1641220&r2=1641221&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java Sun Nov 23 16:52:37 2014
@@ -525,11 +525,15 @@ public class CMapParser
StringBuffer buffer = new StringBuffer();
int stringByte = is.read();
- while (!isWhitespaceOrEOF(stringByte))
+ while (!isWhitespaceOrEOF(stringByte) && !isDelimiter(stringByte))
{
buffer.append((char) stringByte);
stringByte = is.read();
}
+ if (isDelimiter( stringByte))
+ {
+ is.unread(stringByte);
+ }
retval = new LiteralName(buffer.toString());
break;
}
@@ -578,12 +582,12 @@ public class CMapParser
// newline separator may be missing in malformed CMap files
// see PDFBOX-2035
- while (!isWhitespaceOrEOF(nextByte) && nextByte != '<')
+ while (!isWhitespaceOrEOF(nextByte) && !isDelimiter(nextByte) && !Character.isDigit(nextByte))
{
buffer.append((char) nextByte);
nextByte = is.read();
}
- if (nextByte == '<')
+ if (isDelimiter(nextByte) || Character.isDigit(nextByte))
{
is.unread(nextByte);
}
@@ -610,6 +614,27 @@ public class CMapParser
return aByte == -1 || aByte == 0x20 || aByte == 0x0D || aByte == 0x0A;
}
+ /** Is this a standard PDF delimiter character? */
+ private boolean isDelimiter(int aByte)
+ {
+ switch (aByte)
+ {
+ case '(':
+ case ')':
+ case '<':
+ case '>':
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ case '/':
+ case '%':
+ return true;
+ default:
+ return false;
+ }
+ }
+
private void increment(byte[] data)
{
increment(data, data.length - 1);
Modified: pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java?rev=1641221&r1=1641220&r2=1641221&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java (original)
+++ pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java Sun Nov 23 16:52:37 2014
@@ -81,4 +81,18 @@ public class TestCMapParser extends Test
}
return code;
}
+
+ /**
+ * Tets the parser against a valid, but poorly formated CMap file.
+ * @throws IOException If something went wrong
+ */
+ public void testParserWithPoorWhitespace() throws IOException
+ {
+ File inDir = new File("src/test/resources/cmap");
+
+ CMapParser parser = new CMapParser();
+ CMap cMap = parser.parse( new FileInputStream(new File(inDir,"CMapNoWhitespace")));
+
+ assertNotNull("Failed to parse nasty CMap file", cMap);
+ }
}