You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2014/04/20 13:06:27 UTC
svn commit: r1588736 - in /pdfbox/trunk:
fontbox/src/main/java/org/apache/fontbox/cmap/
fontbox/src/test/java/org/apache/fontbox/cmap/
fontbox/src/test/resources/cmap/
pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/
Author: lehmi
Date: Sun Apr 20 11:06:26 2014
New Revision: 1588736
URL: http://svn.apache.org/r1588736
Log:
PDFBOX-2035: be more lenient when parsing CMaps
Modified:
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
pdfbox/trunk/fontbox/src/test/resources/cmap/CMapTest
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java?rev=1588736&r1=1588735&r2=1588736&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java Sun Apr 20 11:06:26 2014
@@ -49,6 +49,7 @@ public class CMapParser
private static final String END_BASE_FONT_RANGE = "endbfrange";
private static final String END_CID_CHAR = "endcidchar";
private static final String END_CID_RANGE = "endcidrange";
+ private static final String END_CMAP = "endcmap";
private static final String WMODE = "WMode";
private static final String CMAP_NAME = "CMapName";
@@ -131,6 +132,11 @@ public class CMapParser
CMap useCMap = parse(resourceRoot, useStream);
result.useCmap(useCMap);
}
+ else if (op.op.equals(END_CMAP))
+ {
+ // end of CMap reached, stop reading as there isn't any interesting info anymore
+ break;
+ }
else if (op.op.equals(BEGIN_CODESPACE_RANGE))
{
Number cosCount = (Number) previousToken;
@@ -482,7 +488,9 @@ public class CMapParser
{
intValue = 10 + theNextByte - 'a';
}
- else if (theNextByte == 0x20)
+ // all kind of whitespaces may occur in malformed CMap files
+ // see PDFBOX-2035
+ else if (isWhitespaceOrEOF(theNextByte))
{
// skipping whitespaces
theNextByte = is.read();
@@ -569,11 +577,17 @@ public class CMapParser
buffer.append((char) nextByte);
nextByte = is.read();
- while (!isWhitespaceOrEOF(nextByte))
+ // newline separator may be missing in malformed CMap files
+ // see PDFBOX-2035
+ while (!isWhitespaceOrEOF(nextByte) && nextByte != '<')
{
buffer.append((char) nextByte);
nextByte = is.read();
}
+ if (nextByte == '<')
+ {
+ is.unread(nextByte);
+ }
retval = new Operator(buffer.toString());
break;
Modified: pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java?rev=1588736&r1=1588735&r2=1588736&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java (original)
+++ pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java Sun Apr 20 11:06:26 2014
@@ -19,13 +19,13 @@ package org.apache.fontbox.cmap;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
+import java.util.Arrays;
import junit.framework.TestCase;
/**
* This will test the CMapParser implementation.
*
- * @version $Revision$
*/
public class TestCMapParser extends TestCase
{
@@ -42,33 +42,42 @@ public class TestCMapParser extends Test
CMapParser parser = new CMapParser();
CMap cMap = parser.parse( resourceDir, new FileInputStream(new File(inDir,"CMapTest")));
+ // code space range
+ assertEquals("codeSpaceRanges size", 1, cMap.getCodeSpaceRanges().size());
+ final byte[] expectedStart = {0, 0}; // 00 00
+ final byte[] expectedEnd = {2, -1}; // 02 FF
+ final byte[] actualStart = cMap.getCodeSpaceRanges().get(0).getStart();
+ final byte[] actualEnd = cMap.getCodeSpaceRanges().get(0).getEnd();
+ assertTrue("codeSpaceRange start", Arrays.equals(expectedStart, actualStart));
+ assertTrue("codeSpaceRange end", Arrays.equals(expectedEnd, actualEnd));
+
// char mappings
byte[] bytes1 = {0,1};
- assertTrue("A".equals(cMap.lookup(bytes1, 0, 2)));
+ assertEquals("bytes 00 01 from bfrange <0001> <0009> <0041>", "A", cMap.lookup(bytes1, 0, 2));
byte[] bytes2 = {1,00};
String str2 = "0";
- assertTrue(str2.equals(cMap.lookup(bytes2, 0, 2)));
+ assertEquals("bytes 01 00 from bfrange <0100> <0109> <0030>", str2, cMap.lookup(bytes2, 0, 2));
byte[] bytes3 = {0,10};
String str3 = "*";
- assertTrue(str3.equals(cMap.lookup(bytes3, 0, 2)));
+ assertEquals("bytes 00 0A from bfchar <000A> <002A>", str3, cMap.lookup(bytes3, 0, 2));
byte[] bytes4 = {1,10};
String str4 = "+";
- assertTrue(str4.equals(cMap.lookup(bytes4, 0, 2)));
+ assertEquals("bytes 01 0A from bfchar <010A> <002B>", str4, cMap.lookup(bytes4, 0, 2));
// CID mappings
int cid1 = 65;
- assertTrue("A".equals(cMap.lookupCID(cid1)));
+ assertEquals("CID 65 from cidrange <0000> <00ff> 0 ", "A", cMap.lookupCID(cid1));
int cid2 = 280;
String strCID2 = "\u0118";
- assertTrue(strCID2.equals(cMap.lookupCID(cid2)));
+ assertEquals("CID 280 from cidrange <0100> <01ff> 256", strCID2, cMap.lookupCID(cid2));
int cid3 = 520;
String strCID3 = "\u0208";
- assertTrue(strCID3.equals(cMap.lookupCID(cid3)));
+ assertEquals("CID 520 from cidchar <0208> 520", strCID3, cMap.lookupCID(cid3));
}
}
Modified: pdfbox/trunk/fontbox/src/test/resources/cmap/CMapTest
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/test/resources/cmap/CMapTest?rev=1588736&r1=1588735&r2=1588736&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/test/resources/cmap/CMapTest (original)
+++ pdfbox/trunk/fontbox/src/test/resources/cmap/CMapTest Sun Apr 20 11:06:26 2014
@@ -18,8 +18,7 @@
%%
%%EndComments
-1 begincodespacerange
- <0000> <02FF>
+1 begincodespacerange<0000> <02FF>
endcodespacerange
2 beginbfchar
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java?rev=1588736&r1=1588735&r2=1588736&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java Sun Apr 20 11:06:26 2014
@@ -24,6 +24,8 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.afm.FontMetric;
import org.apache.fontbox.cmap.CMap;
import org.apache.fontbox.cmap.CMapParser;
@@ -49,6 +51,11 @@ public abstract class PDFont implements
{
/**
+ * Log instance.
+ */
+ private static final Log LOG = LogFactory.getLog(PDFont.class);
+
+ /**
* The cos dictionary for this font.
*/
protected COSDictionary font;
@@ -488,6 +495,7 @@ public abstract class PDFont implements
}
catch (IOException exception)
{
+ LOG.error("An error occurs while reading a CMap", exception);
}
}
return targetCmap;