You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2014/11/23 17:45:52 UTC
svn commit: r1641219 - in /pdfbox/branches/1.8/fontbox/src:
main/java/org/apache/fontbox/cmap/CMapParser.java
test/java/org/apache/fontbox/cmap/TestCMapParser.java
test/resources/cmap/CMapNoWhitespace
Author: lehmi
Date: Sun Nov 23 16:45:51 2014
New Revision: 1641219
URL: http://svn.apache.org/r1641219
Log:
PDFBOX-1648: improve the CMapParser to detect missing delimiters as proposed by May Gilead and Kevin Pearcey
Added:
pdfbox/branches/1.8/fontbox/src/test/resources/cmap/CMapNoWhitespace
Modified:
pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
pdfbox/branches/1.8/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
Modified: pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java?rev=1641219&r1=1641218&r2=1641219&view=diff
==============================================================================
--- pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java (original)
+++ pdfbox/branches/1.8/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java Sun Nov 23 16:45:51 2014
@@ -32,7 +32,7 @@ import org.apache.fontbox.util.ResourceL
/**
* This will parse a CMap stream.
*
- * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
+ * @author Ben Litchfield
*
*/
public class CMapParser
@@ -526,11 +526,15 @@ public class CMapParser
StringBuffer buffer = new StringBuffer();
int stringByte = is.read();
- while (!isWhitespaceOrEOF(stringByte))
+ while (!isWhitespaceOrEOF(stringByte) && !isDelimiter(stringByte))
{
buffer.append((char) stringByte);
stringByte = is.read();
}
+ if (isDelimiter( stringByte))
+ {
+ is.unread(stringByte);
+ }
retval = new LiteralName(buffer.toString());
break;
}
@@ -579,12 +583,12 @@ public class CMapParser
// newline separator may be missing in malformed CMap files
// see PDFBOX-2035
- while (!isWhitespaceOrEOF(nextByte) && nextByte != '<')
+ while (!isWhitespaceOrEOF(nextByte) && !isDelimiter(nextByte) && !Character.isDigit(nextByte))
{
buffer.append((char) nextByte);
nextByte = is.read();
}
- if (nextByte == '<')
+ if (isDelimiter(nextByte) || Character.isDigit(nextByte))
{
is.unread(nextByte);
}
@@ -611,6 +615,27 @@ public class CMapParser
return aByte == -1 || aByte == 0x20 || aByte == 0x0D || aByte == 0x0A;
}
+ /** Is this a standard PDF delimiter character? */
+ private boolean isDelimiter(int aByte)
+ {
+ switch (aByte)
+ {
+ case '(':
+ case ')':
+ case '<':
+ case '>':
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ case '/':
+ case '%':
+ return true;
+ default:
+ return false;
+ }
+ }
+
private void increment(byte[] data)
{
increment(data, data.length - 1);
Modified: pdfbox/branches/1.8/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java?rev=1641219&r1=1641218&r2=1641219&view=diff
==============================================================================
--- pdfbox/branches/1.8/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java (original)
+++ pdfbox/branches/1.8/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java Sun Nov 23 16:45:51 2014
@@ -80,4 +80,20 @@ public class TestCMapParser extends Test
assertEquals("CID 520 from cidchar <0208> 520", strCID3, cMap.lookupCID(cid3));
}
+ /**
+ * Tets the parser against a valid, but poorly formated CMap file.
+ * @throws IOException If something went wrong
+ */
+ public void testParserWithPoorWhitespace() throws IOException
+ {
+ final String resourceDir= "src/test/resources/cmap";
+ File inDir = new File(resourceDir);
+
+ CMapParser parser = new CMapParser();
+ CMap cMap = parser.parse( resourceDir, new FileInputStream(new File(inDir,"CMapNoWhitespace")));
+
+ assertNotNull("Failed to parse nasty CMap file", cMap);
+ }
+
+
}
Added: pdfbox/branches/1.8/fontbox/src/test/resources/cmap/CMapNoWhitespace
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/fontbox/src/test/resources/cmap/CMapNoWhitespace?rev=1641219&view=auto
==============================================================================
--- pdfbox/branches/1.8/fontbox/src/test/resources/cmap/CMapNoWhitespace (added)
+++ pdfbox/branches/1.8/fontbox/src/test/resources/cmap/CMapNoWhitespace Sun Nov 23 16:45:51 2014
@@ -0,0 +1,32 @@
+%% Licensed to the Apache Software Foundation (ASF) under one or more
+%% contributor license agreements. See the NOTICE file distributed with
+%% this work for additional information regarding copyright ownership.
+%% The ASF licenses this file to You under the Apache License, Version 2.0
+%% (the "License"); you may not use this file except in compliance with
+%% the License. You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% This file isn't a valid CMap file, but it contains everything what
+%% is needed to test the CMapParser
+%%
+%%EndComments
+
+/CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo <</Registry (DDACTR+F1+0) /Ordering (0) /Supplement 0 >> def/CMapName /DDACTR+F1+0 def
+/CMapType 2 def1 begincodespacerange <0000> <FFFF> endcodespacerange7 beginbfchar<0003> <0020>
+<0011> <002e>
+<001a> <0037>
+<001b> <0038>
+<0026> <0043>
+<0083> <00b0>
+<00ed> <2212>
+endbfcharendcmap CMapName currentdict /CMap defineresource pop end end
+
+%%EndResource
+%%EOF