You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2014/04/20 13:06:27 UTC

svn commit: r1588736 - in /pdfbox/trunk: fontbox/src/main/java/org/apache/fontbox/cmap/ fontbox/src/test/java/org/apache/fontbox/cmap/ fontbox/src/test/resources/cmap/ pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/

Author: lehmi
Date: Sun Apr 20 11:06:26 2014
New Revision: 1588736

URL: http://svn.apache.org/r1588736
Log:
PDFBOX-2035: be more lenient when parsing CMaps

Modified:
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
    pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
    pdfbox/trunk/fontbox/src/test/resources/cmap/CMapTest
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java

Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java?rev=1588736&r1=1588735&r2=1588736&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java Sun Apr 20 11:06:26 2014
@@ -49,6 +49,7 @@ public class CMapParser
     private static final String END_BASE_FONT_RANGE = "endbfrange";
     private static final String END_CID_CHAR = "endcidchar";
     private static final String END_CID_RANGE = "endcidrange";
+    private static final String END_CMAP = "endcmap";
 
     private static final String WMODE = "WMode";
     private static final String CMAP_NAME = "CMapName";
@@ -131,6 +132,11 @@ public class CMapParser
                     CMap useCMap = parse(resourceRoot, useStream);
                     result.useCmap(useCMap);
                 }
+                else if (op.op.equals(END_CMAP))
+                {
+                    // end of CMap reached, stop reading as there isn't any interesting info anymore
+                    break;
+                }
                 else if (op.op.equals(BEGIN_CODESPACE_RANGE))
                 {
                     Number cosCount = (Number) previousToken;
@@ -482,7 +488,9 @@ public class CMapParser
                     {
                         intValue = 10 + theNextByte - 'a';
                     }
-                    else if (theNextByte == 0x20)
+                    // all kind of whitespaces may occur in malformed CMap files
+                    // see PDFBOX-2035
+                    else if (isWhitespaceOrEOF(theNextByte))
                     {
                         // skipping whitespaces
                         theNextByte = is.read();
@@ -569,11 +577,17 @@ public class CMapParser
             buffer.append((char) nextByte);
             nextByte = is.read();
 
-            while (!isWhitespaceOrEOF(nextByte))
+            // newline separator may be missing in malformed CMap files
+            // see PDFBOX-2035
+            while (!isWhitespaceOrEOF(nextByte) && nextByte != '<')
             {
                 buffer.append((char) nextByte);
                 nextByte = is.read();
             }
+            if (nextByte == '<')
+            {
+                is.unread(nextByte);
+            }
             retval = new Operator(buffer.toString());
 
             break;

Modified: pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java?rev=1588736&r1=1588735&r2=1588736&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java (original)
+++ pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java Sun Apr 20 11:06:26 2014
@@ -19,13 +19,13 @@ package org.apache.fontbox.cmap;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
+import java.util.Arrays;
 
 import junit.framework.TestCase;
 
 /**
  * This will test the CMapParser implementation.
  *
- * @version $Revision$
  */
 public class TestCMapParser extends TestCase 
 {
@@ -42,33 +42,42 @@ public class TestCMapParser extends Test
         CMapParser parser = new CMapParser();
         CMap cMap = parser.parse( resourceDir, new FileInputStream(new File(inDir,"CMapTest")));
         
+        // code space range
+        assertEquals("codeSpaceRanges size", 1, cMap.getCodeSpaceRanges().size());
+        final byte[] expectedStart = {0, 0}; // 00 00
+        final byte[] expectedEnd = {2, -1}; // 02 FF
+        final byte[] actualStart = cMap.getCodeSpaceRanges().get(0).getStart();
+        final byte[] actualEnd = cMap.getCodeSpaceRanges().get(0).getEnd();
+        assertTrue("codeSpaceRange start", Arrays.equals(expectedStart, actualStart));
+        assertTrue("codeSpaceRange end", Arrays.equals(expectedEnd, actualEnd));
+        
         // char mappings
         byte[] bytes1 = {0,1};
-        assertTrue("A".equals(cMap.lookup(bytes1, 0, 2)));
+        assertEquals("bytes 00 01 from bfrange <0001> <0009> <0041>", "A", cMap.lookup(bytes1, 0, 2));
 
         byte[] bytes2 = {1,00};
         String str2 = "0";
-        assertTrue(str2.equals(cMap.lookup(bytes2, 0, 2)));
+        assertEquals("bytes 01 00 from bfrange <0100> <0109> <0030>", str2, cMap.lookup(bytes2, 0, 2));
 
         byte[] bytes3 = {0,10};
         String str3 = "*";
-        assertTrue(str3.equals(cMap.lookup(bytes3, 0, 2)));
+        assertEquals("bytes 00 0A from bfchar <000A> <002A>", str3, cMap.lookup(bytes3, 0, 2));
 
         byte[] bytes4 = {1,10};
         String str4 = "+";
-        assertTrue(str4.equals(cMap.lookup(bytes4, 0, 2)));
+        assertEquals("bytes 01 0A from bfchar <010A> <002B>", str4, cMap.lookup(bytes4, 0, 2));
 
         // CID mappings
         int cid1 = 65;
-        assertTrue("A".equals(cMap.lookupCID(cid1)));
+        assertEquals("CID 65 from cidrange <0000> <00ff> 0 ", "A", cMap.lookupCID(cid1));
 
         int cid2 = 280;
         String strCID2 = "\u0118";
-        assertTrue(strCID2.equals(cMap.lookupCID(cid2)));
+        assertEquals("CID 280 from cidrange <0100> <01ff> 256", strCID2, cMap.lookupCID(cid2));
         
         int cid3 = 520;
         String strCID3 = "\u0208";
-        assertTrue(strCID3.equals(cMap.lookupCID(cid3)));
+        assertEquals("CID 520 from cidchar <0208> 520", strCID3, cMap.lookupCID(cid3));
     }
 
 }

Modified: pdfbox/trunk/fontbox/src/test/resources/cmap/CMapTest
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/test/resources/cmap/CMapTest?rev=1588736&r1=1588735&r2=1588736&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/test/resources/cmap/CMapTest (original)
+++ pdfbox/trunk/fontbox/src/test/resources/cmap/CMapTest Sun Apr 20 11:06:26 2014
@@ -18,8 +18,7 @@
 %%
 %%EndComments
 
-1 begincodespacerange
-  <0000> <02FF>
+1 begincodespacerange<0000> <02FF>
 endcodespacerange
 
 2 beginbfchar

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java?rev=1588736&r1=1588735&r2=1588736&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java Sun Apr 20 11:06:26 2014
@@ -24,6 +24,8 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.fontbox.afm.FontMetric;
 import org.apache.fontbox.cmap.CMap;
 import org.apache.fontbox.cmap.CMapParser;
@@ -49,6 +51,11 @@ public abstract class PDFont implements 
 {
 
     /**
+     * Log instance.
+     */
+    private static final Log LOG = LogFactory.getLog(PDFont.class);
+
+    /**
      * The cos dictionary for this font.
      */
     protected COSDictionary font;
@@ -488,6 +495,7 @@ public abstract class PDFont implements 
             }
             catch (IOException exception)
             {
+                LOG.error("An error occurs while reading a CMap", exception);
             }
         }
         return targetCmap;