You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2014/11/23 17:52:38 UTC

svn commit: r1641221 - in /pdfbox/trunk/fontbox/src: main/java/org/apache/fontbox/cmap/CMapParser.java test/java/org/apache/fontbox/cmap/TestCMapParser.java test/resources/cmap/CMapNoWhitespace

Author: lehmi
Date: Sun Nov 23 16:52:37 2014
New Revision: 1641221

URL: http://svn.apache.org/r1641221
Log:
PDFBOX-1648: improve the CMapParser to detect missing delimiters as proposed by May Gilead and Kevin Pearcey

Added:
    pdfbox/trunk/fontbox/src/test/resources/cmap/CMapNoWhitespace
      - copied unchanged from r1641219, pdfbox/branches/1.8/fontbox/src/test/resources/cmap/CMapNoWhitespace
Modified:
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
    pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java

Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java?rev=1641221&r1=1641220&r2=1641221&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java Sun Nov 23 16:52:37 2014
@@ -525,11 +525,15 @@ public class CMapParser
             StringBuffer buffer = new StringBuffer();
             int stringByte = is.read();
 
-            while (!isWhitespaceOrEOF(stringByte))
+            while (!isWhitespaceOrEOF(stringByte) && !isDelimiter(stringByte))
             {
                 buffer.append((char) stringByte);
                 stringByte = is.read();
             }
+            if (isDelimiter( stringByte)) 
+            {
+                is.unread(stringByte);
+            }
             retval = new LiteralName(buffer.toString());
             break;
         }
@@ -578,12 +582,12 @@ public class CMapParser
 
             // newline separator may be missing in malformed CMap files
             // see PDFBOX-2035
-            while (!isWhitespaceOrEOF(nextByte) && nextByte != '<')
+            while (!isWhitespaceOrEOF(nextByte) && !isDelimiter(nextByte) && !Character.isDigit(nextByte))
             {
                 buffer.append((char) nextByte);
                 nextByte = is.read();
             }
-            if (nextByte == '<')
+            if (isDelimiter(nextByte) || Character.isDigit(nextByte))
             {
                 is.unread(nextByte);
             }
@@ -610,6 +614,27 @@ public class CMapParser
         return aByte == -1 || aByte == 0x20 || aByte == 0x0D || aByte == 0x0A;
     }
 
+    /** Is this a standard PDF delimiter character? */
+    private boolean isDelimiter(int aByte) 
+    {
+        switch (aByte) 
+        {
+            case '(':
+            case ')':
+            case '<':
+            case '>':
+            case '[':
+            case ']':
+            case '{':
+            case '}':
+            case '/':
+            case '%':
+                return true;
+            default:
+                return false;
+        }
+    }
+
     private void increment(byte[] data)
     {
         increment(data, data.length - 1);

Modified: pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java?rev=1641221&r1=1641220&r2=1641221&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java (original)
+++ pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java Sun Nov 23 16:52:37 2014
@@ -81,4 +81,18 @@ public class TestCMapParser extends Test
         }
         return code;
     }
+
+    /**
+     * Tets the parser against a valid, but poorly formated CMap file.
+     * @throws IOException If something went wrong
+     */
+    public void testParserWithPoorWhitespace() throws IOException 
+    {
+        File inDir = new File("src/test/resources/cmap");
+
+        CMapParser parser = new CMapParser();
+        CMap cMap = parser.parse( new FileInputStream(new File(inDir,"CMapNoWhitespace")));
+
+        assertNotNull("Failed to parse nasty CMap file", cMap);
+    }
 }